[llvm-branch-commits] [llvm] 7776c99 - [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbbp asm instructions

Mon Jul 27 04:09:10 PDT 2020

Author: lewis-revill
Date: 2020-07-27T13:07:37+02:00
New Revision: 7776c991d06e1d84ffb5e709024bfff5e51f7e8e

URL: https://github.com/llvm/llvm-project/commit/7776c991d06e1d84ffb5e709024bfff5e51f7e8e
DIFF: https://github.com/llvm/llvm-project/commit/7776c991d06e1d84ffb5e709024bfff5e51f7e8e.diff

LOG: [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbbp asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions belonging to both the permutation and the base
subsets of the experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79873

(cherry picked from commit 6144f0a1e52e7f5439a67267ca65f2d72c21aaa6)

Added: 
    llvm/test/CodeGen/RISCV/rv32Zbbp.ll
    llvm/test/CodeGen/RISCV/rv64Zbbp.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 99e5135b424f..fd1a91f68802 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -272,6 +272,44 @@ bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a RORI (Rotate Right Immediate). We first check that
+// it is the right node tree:
+//
+//  (ROTL RS1, VC)
+//
+// The compiler translates immediate rotations to the right given by the call
+// to the rotateright32/rotateright64 intrinsics as rotations to the left.
+// Since the rotation to the left can be easily emulated as a rotation to the
+// right by negating the constant, there is no encoding for ROLI.
+// We then select the immediate left rotations as RORI by the complementary
+// constant:
+//
+//  Shamt == XLen - VC
+
+bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  MVT XLenVT = Subtarget->getXLenVT();
+  if (N.getOpcode() == ISD::ROTL) {
+    if (isa<ConstantSDNode>(N.getOperand(1))) {
+      if (XLenVT == MVT::i64) {
+        uint64_t VC = N.getConstantOperandVal(1);
+        Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
+                                          N.getOperand(1).getValueType());
+        RS1 = N.getOperand(0);
+        return true;
+      }
+      if (XLenVT == MVT::i32) {
+        uint32_t VC = N.getConstantOperandVal(1);
+        Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
+                                          N.getOperand(1).getValueType());
+        RS1 = N.getOperand(0);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
 // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
 // on RV64).
 // SLLIUW is the same as SLLI except for the fact that it clears the bits
@@ -374,6 +412,53 @@ bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
+// We first check that it is the right node tree:
+//
+//  (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
+//                         (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
+//
+// Then we check that the constant operands respect these constraints:
+//
+// VC2 == 32 - VC1
+// VC3 == maskLeadingOnes<uint32_t>(VC2)
+//
+// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
+// and VC3 a 32 bit mask of (32 - VC1) leading ones.
+
+bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      Subtarget->getXLenVT() == MVT::i64 &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    if (N.getOperand(0).getOpcode() == ISD::OR) {
+      SDValue Or = N.getOperand(0);
+      if (Or.getOperand(0).getOpcode() == ISD::SHL &&
+          Or.getOperand(1).getOpcode() == ISD::SRL) {
+        SDValue Shl = Or.getOperand(0);
+        SDValue Srl = Or.getOperand(1);
+        if (Srl.getOperand(0).getOpcode() == ISD::AND) {
+          SDValue And = Srl.getOperand(0);
+          if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+              isa<ConstantSDNode>(Shl.getOperand(1)) &&
+              isa<ConstantSDNode>(And.getOperand(1))) {
+            uint32_t VC1 = Srl.getConstantOperandVal(1);
+            uint32_t VC2 = Shl.getConstantOperandVal(1);
+            uint32_t VC3 = And.getConstantOperandVal(1);
+            if (VC2 == (32 - VC1) &&
+                VC3 == maskLeadingOnes<uint32_t>(VC2)) {
+              RS1 = Shl.getOperand(0);
+              Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
+                                              Srl.getOperand(1).getValueType());
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 4e382ee58500..bc1655b673d7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -47,9 +47,11 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
 
   bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
 
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c89bb21c9701..7cad9f9bd43e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -149,8 +149,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
 
-  setOperationAction(ISD::ROTL, XLenVT, Expand);
-  setOperationAction(ISD::ROTR, XLenVT, Expand);
+  if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
+    setOperationAction(ISD::ROTL, XLenVT, Expand);
+    setOperationAction(ISD::ROTR, XLenVT, Expand);
+  }
 
   if (!Subtarget.hasStdExtZbp())
     setOperationAction(ISD::BSWAP, XLenVT, Expand);

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 09d5f1ef856a..45eb41f93b2e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -638,21 +638,46 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
 //===----------------------------------------------------------------------===//
 def SLOIPat   : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
 def SROIPat   : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
+def RORIPat   : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
 def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
+def RORIWPat  : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or  GPR:$rs1, (not GPR:$rs2)), (ORN  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
 
 let Predicates = [HasStdExtZbb] in {
 def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SLO GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SRO GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbb] in {
 def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
 } // Predicates = [HasStdExtZbb]
 
+// There's no encoding for roli in the current version of the 'B' extension
+// (v0.92) as it can be implemented with rori by negating the immediate.
+// For this reason we pattern-match only against rori[w].
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
+
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
               (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
@@ -772,6 +797,23 @@ def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
           (MAXU  GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbb]
 
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))),
+          (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))),
+          (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))),
+          (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))),
+          (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00),
+              (and GPR:$rs1, 0x00FF)),
+          (PACKH GPR:$rs1, GPR:$rs2)>;
+
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
                   (and GPR:$rs1, (i32 0xFF0000FF))),
@@ -831,12 +873,30 @@ def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SLOW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SROW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
+              (riscv_srlw (assertsexti32 GPR:$rs1),
+                          (sub (i64 0), (assertsexti32 GPR:$rs2)))),
+          (ROLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
+                          (sub (i64 0), (assertsexti32 GPR:$rs2))),
+              (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))),
+          (RORW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+
 let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
                               GPR:$rs1),
@@ -898,3 +958,14 @@ def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
 // RV64 CTZ
 def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)),
+                          (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)),
+                      i32),
+          (PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
+              (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000),
+                   (i64 16))),
+          (PACKUW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]

diff  --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
new file mode 100644
index 000000000000..0e6288928f0c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
@@ -0,0 +1,892 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBP
+
+define i32 @andn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: andn_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andn a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: andn_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andn a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: andn_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andn a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: andn_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andn a0, a0, a2
+; RV32IB-NEXT:    andn a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: andn_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andn a0, a0, a2
+; RV32IBB-NEXT:    andn a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: andn_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andn a0, a0, a2
+; RV32IBP-NEXT:    andn a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define i32 @orn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: orn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: orn_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orn a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: orn_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    orn a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: orn_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orn a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: orn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: orn_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orn a0, a0, a2
+; RV32IB-NEXT:    orn a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: orn_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    orn a0, a0, a2
+; RV32IBB-NEXT:    orn a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: orn_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orn a0, a0, a2
+; RV32IBP-NEXT:    orn a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define i32 @xnor_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: xnor_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: xnor_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    xnor a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: xnor_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    xnor a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: xnor_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    xnor a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: xnor_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a1, a1, a3
+; RV32I-NEXT:    xor a0, a0, a2
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: xnor_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    xnor a0, a0, a2
+; RV32IB-NEXT:    xnor a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: xnor_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    xnor a0, a0, a2
+; RV32IBB-NEXT:    xnor a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: xnor_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    xnor a0, a0, a2
+; RV32IBP-NEXT:    xnor a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @rol_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: rol_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rol_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rol a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rol_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    rol a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rol_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rol a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: rol_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi t1, a3, -32
+; RV32I-NEXT:    addi a6, zero, 31
+; RV32I-NEXT:    bltz t1, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll a7, a0, t1
+; RV32I-NEXT:    j .LBB7_3
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    sll a4, a1, a2
+; RV32I-NEXT:    sub a3, a6, a3
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    srl a3, a5, a3
+; RV32I-NEXT:    or a7, a4, a3
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi a3, a5, -32
+; RV32I-NEXT:    bltz a3, .LBB7_7
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t0, zero
+; RV32I-NEXT:    bgez a3, .LBB7_8
+; RV32I-NEXT:  .LBB7_5:
+; RV32I-NEXT:    srl a3, a0, a4
+; RV32I-NEXT:    sub a4, a6, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a1, a1, a4
+; RV32I-NEXT:    or a4, a3, a1
+; RV32I-NEXT:    or a1, a7, t0
+; RV32I-NEXT:    bgez t1, .LBB7_9
+; RV32I-NEXT:  .LBB7_6:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB7_7:
+; RV32I-NEXT:    srl t0, a1, a4
+; RV32I-NEXT:    bltz a3, .LBB7_5
+; RV32I-NEXT:  .LBB7_8:
+; RV32I-NEXT:    srl a4, a1, a3
+; RV32I-NEXT:    or a1, a7, t0
+; RV32I-NEXT:    bltz t1, .LBB7_6
+; RV32I-NEXT:  .LBB7_9:
+; RV32I-NEXT:    or a0, zero, a4
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rol_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi t1, a3, -32
+; RV32IB-NEXT:    addi a6, zero, 31
+; RV32IB-NEXT:    bltz t1, .LBB7_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sll a7, a0, t1
+; RV32IB-NEXT:    j .LBB7_3
+; RV32IB-NEXT:  .LBB7_2:
+; RV32IB-NEXT:    sll a4, a1, a2
+; RV32IB-NEXT:    sub a3, a6, a3
+; RV32IB-NEXT:    srli a5, a0, 1
+; RV32IB-NEXT:    srl a3, a5, a3
+; RV32IB-NEXT:    or a7, a4, a3
+; RV32IB-NEXT:  .LBB7_3:
+; RV32IB-NEXT:    neg a4, a2
+; RV32IB-NEXT:    andi a5, a4, 63
+; RV32IB-NEXT:    addi a3, a5, -32
+; RV32IB-NEXT:    bltz a3, .LBB7_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t0, zero
+; RV32IB-NEXT:    bgez a3, .LBB7_8
+; RV32IB-NEXT:  .LBB7_5:
+; RV32IB-NEXT:    srl a3, a0, a4
+; RV32IB-NEXT:    sub a4, a6, a5
+; RV32IB-NEXT:    slli a1, a1, 1
+; RV32IB-NEXT:    sll a1, a1, a4
+; RV32IB-NEXT:    or a4, a3, a1
+; RV32IB-NEXT:    or a1, a7, t0
+; RV32IB-NEXT:    bgez t1, .LBB7_9
+; RV32IB-NEXT:  .LBB7_6:
+; RV32IB-NEXT:    sll a0, a0, a2
+; RV32IB-NEXT:    or a0, a0, a4
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB7_7:
+; RV32IB-NEXT:    srl t0, a1, a4
+; RV32IB-NEXT:    bltz a3, .LBB7_5
+; RV32IB-NEXT:  .LBB7_8:
+; RV32IB-NEXT:    srl a4, a1, a3
+; RV32IB-NEXT:    or a1, a7, t0
+; RV32IB-NEXT:    bltz t1, .LBB7_6
+; RV32IB-NEXT:  .LBB7_9:
+; RV32IB-NEXT:    or a0, zero, a4
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rol_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andi a3, a2, 63
+; RV32IBB-NEXT:    addi t1, a3, -32
+; RV32IBB-NEXT:    addi a6, zero, 31
+; RV32IBB-NEXT:    bltz t1, .LBB7_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    sll a7, a0, t1
+; RV32IBB-NEXT:    j .LBB7_3
+; RV32IBB-NEXT:  .LBB7_2:
+; RV32IBB-NEXT:    sll a4, a1, a2
+; RV32IBB-NEXT:    sub a3, a6, a3
+; RV32IBB-NEXT:    srli a5, a0, 1
+; RV32IBB-NEXT:    srl a3, a5, a3
+; RV32IBB-NEXT:    or a7, a4, a3
+; RV32IBB-NEXT:  .LBB7_3:
+; RV32IBB-NEXT:    neg a4, a2
+; RV32IBB-NEXT:    andi a5, a4, 63
+; RV32IBB-NEXT:    addi a3, a5, -32
+; RV32IBB-NEXT:    bltz a3, .LBB7_7
+; RV32IBB-NEXT:  # %bb.4:
+; RV32IBB-NEXT:    mv t0, zero
+; RV32IBB-NEXT:    bgez a3, .LBB7_8
+; RV32IBB-NEXT:  .LBB7_5:
+; RV32IBB-NEXT:    srl a3, a0, a4
+; RV32IBB-NEXT:    sub a4, a6, a5
+; RV32IBB-NEXT:    slli a1, a1, 1
+; RV32IBB-NEXT:    sll a1, a1, a4
+; RV32IBB-NEXT:    or a4, a3, a1
+; RV32IBB-NEXT:    or a1, a7, t0
+; RV32IBB-NEXT:    bgez t1, .LBB7_9
+; RV32IBB-NEXT:  .LBB7_6:
+; RV32IBB-NEXT:    sll a0, a0, a2
+; RV32IBB-NEXT:    or a0, a0, a4
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB7_7:
+; RV32IBB-NEXT:    srl t0, a1, a4
+; RV32IBB-NEXT:    bltz a3, .LBB7_5
+; RV32IBB-NEXT:  .LBB7_8:
+; RV32IBB-NEXT:    srl a4, a1, a3
+; RV32IBB-NEXT:    or a1, a7, t0
+; RV32IBB-NEXT:    bltz t1, .LBB7_6
+; RV32IBB-NEXT:  .LBB7_9:
+; RV32IBB-NEXT:    or a0, zero, a4
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rol_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andi a3, a2, 63
+; RV32IBP-NEXT:    addi t1, a3, -32
+; RV32IBP-NEXT:    addi a6, zero, 31
+; RV32IBP-NEXT:    bltz t1, .LBB7_2
+; RV32IBP-NEXT:  # %bb.1:
+; RV32IBP-NEXT:    sll a7, a0, t1
+; RV32IBP-NEXT:    j .LBB7_3
+; RV32IBP-NEXT:  .LBB7_2:
+; RV32IBP-NEXT:    sll a4, a1, a2
+; RV32IBP-NEXT:    sub a3, a6, a3
+; RV32IBP-NEXT:    srli a5, a0, 1
+; RV32IBP-NEXT:    srl a3, a5, a3
+; RV32IBP-NEXT:    or a7, a4, a3
+; RV32IBP-NEXT:  .LBB7_3:
+; RV32IBP-NEXT:    neg a4, a2
+; RV32IBP-NEXT:    andi a5, a4, 63
+; RV32IBP-NEXT:    addi a3, a5, -32
+; RV32IBP-NEXT:    bltz a3, .LBB7_7
+; RV32IBP-NEXT:  # %bb.4:
+; RV32IBP-NEXT:    mv t0, zero
+; RV32IBP-NEXT:    bgez a3, .LBB7_8
+; RV32IBP-NEXT:  .LBB7_5:
+; RV32IBP-NEXT:    srl a3, a0, a4
+; RV32IBP-NEXT:    sub a4, a6, a5
+; RV32IBP-NEXT:    slli a1, a1, 1
+; RV32IBP-NEXT:    sll a1, a1, a4
+; RV32IBP-NEXT:    or a4, a3, a1
+; RV32IBP-NEXT:    or a1, a7, t0
+; RV32IBP-NEXT:    bgez t1, .LBB7_9
+; RV32IBP-NEXT:  .LBB7_6:
+; RV32IBP-NEXT:    sll a0, a0, a2
+; RV32IBP-NEXT:    or a0, a0, a4
+; RV32IBP-NEXT:    ret
+; RV32IBP-NEXT:  .LBB7_7:
+; RV32IBP-NEXT:    srl t0, a1, a4
+; RV32IBP-NEXT:    bltz a3, .LBB7_5
+; RV32IBP-NEXT:  .LBB7_8:
+; RV32IBP-NEXT:    srl a4, a1, a3
+; RV32IBP-NEXT:    or a1, a7, t0
+; RV32IBP-NEXT:    bltz t1, .LBB7_6
+; RV32IBP-NEXT:  .LBB7_9:
+; RV32IBP-NEXT:    or a0, zero, a4
+; RV32IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @ror_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: ror_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    sll a0, a0, a1
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ror_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    ror a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ror_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    ror a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: ror_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    ror a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: ror_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi t1, a3, -32
+; RV32I-NEXT:    addi a6, zero, 31
+; RV32I-NEXT:    bltz t1, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a7, a1, t1
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    srl a4, a0, a2
+; RV32I-NEXT:    sub a3, a6, a3
+; RV32I-NEXT:    slli a5, a1, 1
+; RV32I-NEXT:    sll a3, a5, a3
+; RV32I-NEXT:    or a7, a4, a3
+; RV32I-NEXT:  .LBB9_3:
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi a3, a5, -32
+; RV32I-NEXT:    bltz a3, .LBB9_7
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t0, zero
+; RV32I-NEXT:    bgez a3, .LBB9_8
+; RV32I-NEXT:  .LBB9_5:
+; RV32I-NEXT:    sll a3, a1, a4
+; RV32I-NEXT:    sub a4, a6, a5
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srl a0, a0, a4
+; RV32I-NEXT:    or a4, a3, a0
+; RV32I-NEXT:    or a0, t0, a7
+; RV32I-NEXT:    bgez t1, .LBB9_9
+; RV32I-NEXT:  .LBB9_6:
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a1, a4, a1
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB9_7:
+; RV32I-NEXT:    sll t0, a0, a4
+; RV32I-NEXT:    bltz a3, .LBB9_5
+; RV32I-NEXT:  .LBB9_8:
+; RV32I-NEXT:    sll a4, a0, a3
+; RV32I-NEXT:    or a0, t0, a7
+; RV32I-NEXT:    bltz t1, .LBB9_6
+; RV32I-NEXT:  .LBB9_9:
+; RV32I-NEXT:    or a1, a4, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ror_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi t1, a3, -32
+; RV32IB-NEXT:    addi a6, zero, 31
+; RV32IB-NEXT:    bltz t1, .LBB9_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    srl a7, a1, t1
+; RV32IB-NEXT:    j .LBB9_3
+; RV32IB-NEXT:  .LBB9_2:
+; RV32IB-NEXT:    srl a4, a0, a2
+; RV32IB-NEXT:    sub a3, a6, a3
+; RV32IB-NEXT:    slli a5, a1, 1
+; RV32IB-NEXT:    sll a3, a5, a3
+; RV32IB-NEXT:    or a7, a4, a3
+; RV32IB-NEXT:  .LBB9_3:
+; RV32IB-NEXT:    neg a4, a2
+; RV32IB-NEXT:    andi a5, a4, 63
+; RV32IB-NEXT:    addi a3, a5, -32
+; RV32IB-NEXT:    bltz a3, .LBB9_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t0, zero
+; RV32IB-NEXT:    bgez a3, .LBB9_8
+; RV32IB-NEXT:  .LBB9_5:
+; RV32IB-NEXT:    sll a3, a1, a4
+; RV32IB-NEXT:    sub a4, a6, a5
+; RV32IB-NEXT:    srli a0, a0, 1
+; RV32IB-NEXT:    srl a0, a0, a4
+; RV32IB-NEXT:    or a4, a3, a0
+; RV32IB-NEXT:    or a0, t0, a7
+; RV32IB-NEXT:    bgez t1, .LBB9_9
+; RV32IB-NEXT:  .LBB9_6:
+; RV32IB-NEXT:    srl a1, a1, a2
+; RV32IB-NEXT:    or a1, a4, a1
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB9_7:
+; RV32IB-NEXT:    sll t0, a0, a4
+; RV32IB-NEXT:    bltz a3, .LBB9_5
+; RV32IB-NEXT:  .LBB9_8:
+; RV32IB-NEXT:    sll a4, a0, a3
+; RV32IB-NEXT:    or a0, t0, a7
+; RV32IB-NEXT:    bltz t1, .LBB9_6
+; RV32IB-NEXT:  .LBB9_9:
+; RV32IB-NEXT:    or a1, a4, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ror_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andi a3, a2, 63
+; RV32IBB-NEXT:    addi t1, a3, -32
+; RV32IBB-NEXT:    addi a6, zero, 31
+; RV32IBB-NEXT:    bltz t1, .LBB9_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    srl a7, a1, t1
+; RV32IBB-NEXT:    j .LBB9_3
+; RV32IBB-NEXT:  .LBB9_2:
+; RV32IBB-NEXT:    srl a4, a0, a2
+; RV32IBB-NEXT:    sub a3, a6, a3
+; RV32IBB-NEXT:    slli a5, a1, 1
+; RV32IBB-NEXT:    sll a3, a5, a3
+; RV32IBB-NEXT:    or a7, a4, a3
+; RV32IBB-NEXT:  .LBB9_3:
+; RV32IBB-NEXT:    neg a4, a2
+; RV32IBB-NEXT:    andi a5, a4, 63
+; RV32IBB-NEXT:    addi a3, a5, -32
+; RV32IBB-NEXT:    bltz a3, .LBB9_7
+; RV32IBB-NEXT:  # %bb.4:
+; RV32IBB-NEXT:    mv t0, zero
+; RV32IBB-NEXT:    bgez a3, .LBB9_8
+; RV32IBB-NEXT:  .LBB9_5:
+; RV32IBB-NEXT:    sll a3, a1, a4
+; RV32IBB-NEXT:    sub a4, a6, a5
+; RV32IBB-NEXT:    srli a0, a0, 1
+; RV32IBB-NEXT:    srl a0, a0, a4
+; RV32IBB-NEXT:    or a4, a3, a0
+; RV32IBB-NEXT:    or a0, t0, a7
+; RV32IBB-NEXT:    bgez t1, .LBB9_9
+; RV32IBB-NEXT:  .LBB9_6:
+; RV32IBB-NEXT:    srl a1, a1, a2
+; RV32IBB-NEXT:    or a1, a4, a1
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB9_7:
+; RV32IBB-NEXT:    sll t0, a0, a4
+; RV32IBB-NEXT:    bltz a3, .LBB9_5
+; RV32IBB-NEXT:  .LBB9_8:
+; RV32IBB-NEXT:    sll a4, a0, a3
+; RV32IBB-NEXT:    or a0, t0, a7
+; RV32IBB-NEXT:    bltz t1, .LBB9_6
+; RV32IBB-NEXT:  .LBB9_9:
+; RV32IBB-NEXT:    or a1, a4, zero
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: ror_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andi a3, a2, 63
+; RV32IBP-NEXT:    addi t1, a3, -32
+; RV32IBP-NEXT:    addi a6, zero, 31
+; RV32IBP-NEXT:    bltz t1, .LBB9_2
+; RV32IBP-NEXT:  # %bb.1:
+; RV32IBP-NEXT:    srl a7, a1, t1
+; RV32IBP-NEXT:    j .LBB9_3
+; RV32IBP-NEXT:  .LBB9_2:
+; RV32IBP-NEXT:    srl a4, a0, a2
+; RV32IBP-NEXT:    sub a3, a6, a3
+; RV32IBP-NEXT:    slli a5, a1, 1
+; RV32IBP-NEXT:    sll a3, a5, a3
+; RV32IBP-NEXT:    or a7, a4, a3
+; RV32IBP-NEXT:  .LBB9_3:
+; RV32IBP-NEXT:    neg a4, a2
+; RV32IBP-NEXT:    andi a5, a4, 63
+; RV32IBP-NEXT:    addi a3, a5, -32
+; RV32IBP-NEXT:    bltz a3, .LBB9_7
+; RV32IBP-NEXT:  # %bb.4:
+; RV32IBP-NEXT:    mv t0, zero
+; RV32IBP-NEXT:    bgez a3, .LBB9_8
+; RV32IBP-NEXT:  .LBB9_5:
+; RV32IBP-NEXT:    sll a3, a1, a4
+; RV32IBP-NEXT:    sub a4, a6, a5
+; RV32IBP-NEXT:    srli a0, a0, 1
+; RV32IBP-NEXT:    srl a0, a0, a4
+; RV32IBP-NEXT:    or a4, a3, a0
+; RV32IBP-NEXT:    or a0, t0, a7
+; RV32IBP-NEXT:    bgez t1, .LBB9_9
+; RV32IBP-NEXT:  .LBB9_6:
+; RV32IBP-NEXT:    srl a1, a1, a2
+; RV32IBP-NEXT:    or a1, a4, a1
+; RV32IBP-NEXT:    ret
+; RV32IBP-NEXT:  .LBB9_7:
+; RV32IBP-NEXT:    sll t0, a0, a4
+; RV32IBP-NEXT:    bltz a3, .LBB9_5
+; RV32IBP-NEXT:  .LBB9_8:
+; RV32IBP-NEXT:    sll a4, a0, a3
+; RV32IBP-NEXT:    or a0, t0, a7
+; RV32IBP-NEXT:    bltz t1, .LBB9_6
+; RV32IBP-NEXT:  .LBB9_9:
+; RV32IBP-NEXT:    or a1, a4, zero
+; RV32IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define i32 @rori_i32(i32 %a) nounwind {
+; RV32I-LABEL: rori_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rori_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rori_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    rori a0, a0, 1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rori_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 1
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV32I-LABEL: rori_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 31
+; RV32I-NEXT:    srli a3, a0, 1
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rori_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, zero, 31
+; RV32IB-NEXT:    fsl a2, a1, a3, a0
+; RV32IB-NEXT:    fsl a1, a0, a3, a1
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rori_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    slli a2, a1, 31
+; RV32IBB-NEXT:    srli a3, a0, 1
+; RV32IBB-NEXT:    or a2, a3, a2
+; RV32IBB-NEXT:    srli a1, a1, 1
+; RV32IBB-NEXT:    slli a0, a0, 31
+; RV32IBB-NEXT:    or a1, a0, a1
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rori_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    slli a2, a1, 31
+; RV32IBP-NEXT:    srli a3, a0, 1
+; RV32IBP-NEXT:    or a2, a3, a2
+; RV32IBP-NEXT:    srli a1, a1, 1
+; RV32IBP-NEXT:    slli a0, a0, 31
+; RV32IBP-NEXT:    or a1, a0, a1
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i32 @pack_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: pack_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a1, a1, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: pack_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    pack a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: pack_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    pack a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: pack_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    pack a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: pack_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: pack_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    mv a1, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: pack_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    mv a1, a2
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: pack_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    mv a1, a2
+; RV32IBP-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+define i32 @packu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packu_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lui a2, 1048560
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packu_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packu a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packu_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packu a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packu_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packu a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %shr = lshr i32 %a, 16
+  %shr1 = and i32 %b, -65536
+  %or = or i32 %shr1, %shr
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packu_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packu_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    mv a0, a1
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packu_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    mv a0, a1
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packu_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    mv a0, a1
+; RV32IBP-NEXT:    mv a1, a3
+; RV32IBP-NEXT:    ret
+  %shr = lshr i64 %a, 32
+  %shr1 = and i64 %b, -4294967296
+  %or = or i64 %shr1, %shr
+  ret i64 %or
+}
+
+define i32 @packh_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packh_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packh_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packh a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packh_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packh a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packh_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packh a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packh_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a2, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packh_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packh a0, a0, a2
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packh_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packh a0, a0, a2
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packh_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packh a0, a0, a2
+; RV32IBP-NEXT:    mv a1, zero
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
new file mode 100644
index 000000000000..c3a6799739d2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBP
+
+define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: andn_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: andn_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    andn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: andn_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    andn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: andn_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: andn_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    andn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: andn_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    andn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: orn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: orn_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: orn_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    orn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: orn_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: orn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: orn_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: orn_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    orn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: orn_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: xnor_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: xnor_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    xnor a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: xnor_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    xnor a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: xnor_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    xnor a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: xnor_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: xnor_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    xnor a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: xnor_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    xnor a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: xnor_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    xnor a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: rol_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sllw a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rol_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rolw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rol_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rolw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rol_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rolw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: rol_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sll a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rol_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rol a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rol_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rol a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rol_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rol a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: ror_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ror_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rorw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ror_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rorw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: ror_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rorw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: ror_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ror_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    ror a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ror_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    ror a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: ror_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    ror a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define signext i32 @rori_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rori_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsriw a0, a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rori_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    roriw a0, a0, 1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rori_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    roriw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rori_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rori a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rori_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rori a0, a0, 1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rori_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rori a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: pack_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: pack_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: pack_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: pack_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: pack_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: pack_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    pack a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: pack_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    pack a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: pack_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    pack a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packu_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packuw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packu_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packuw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packu_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packuw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shr = lshr i32 %a, 16
+  %shr1 = and i32 %b, -65536
+  %or = or i32 %shr1, %shr
+  ret i32 %or
+}
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a2, zero, -1
+; RV64I-NEXT:    slli a2, a2, 32
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packu_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packu_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packu a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packu_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packu a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shr = lshr i64 %a, 32
+  %shr1 = and i64 %b, -4294967296
+  %or = or i64 %shr1, %shr
+  ret i64 %or
+}
+
+define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packh_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packh_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packh a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packh_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packh a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packh_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packh a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packh_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packh_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packh a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packh_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packh a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packh_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packh a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}