[llvm] c9c955a - [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbt asm instructions

Wed Jul 15 04:20:40 PDT 2020

Author: lewis-revill
Date: 2020-07-15T12:19:34+01:00
New Revision: c9c955ada8e65205312f2bc41b46eefa0e98b36c

URL: https://github.com/llvm/llvm-project/commit/c9c955ada8e65205312f2bc41b46eefa0e98b36c
DIFF: https://github.com/llvm/llvm-project/commit/c9c955ada8e65205312f2bc41b46eefa0e98b36c.diff

LOG: [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbt asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the ternary subset (zbt subextension) of the
experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79875

Added: 
    llvm/test/CodeGen/RISCV/rv32Zbt.ll
    llvm/test/CodeGen/RISCV/rv64Zbt.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index fd1a91f68802..7570385e38e3 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -459,6 +459,55 @@ bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64).
+// We first check that it is the right node tree:
+//
+//  (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
+//                         (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
+//
+// Then we check that the constant operands respect these constraints:
+//
+// VC2 == 32 - VC1
+// VC3 == maskLeadingOnes<uint32_t>(VC2)
+//
+// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
+// and VC3 a 32 bit mask of (32 - VC1) leading ones.
+
+bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2,
+                                    SDValue &Shamt) {
+  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      Subtarget->getXLenVT() == MVT::i64 &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    if (N.getOperand(0).getOpcode() == ISD::OR) {
+      SDValue Or = N.getOperand(0);
+      if (Or.getOperand(0).getOpcode() == ISD::SHL &&
+          Or.getOperand(1).getOpcode() == ISD::SRL) {
+        SDValue Shl = Or.getOperand(0);
+        SDValue Srl = Or.getOperand(1);
+        if (Srl.getOperand(0).getOpcode() == ISD::AND) {
+          SDValue And = Srl.getOperand(0);
+          if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+              isa<ConstantSDNode>(Shl.getOperand(1)) &&
+              isa<ConstantSDNode>(And.getOperand(1))) {
+            uint32_t VC1 = Srl.getConstantOperandVal(1);
+            uint32_t VC2 = Shl.getConstantOperandVal(1);
+            uint32_t VC3 = And.getConstantOperandVal(1);
+            if (VC2 == (32 - VC1) &&
+                VC3 == maskLeadingOnes<uint32_t>(VC2)) {
+              RS1 = Shl.getOperand(0);
+              RS2 = And.getOperand(0);
+              Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
+                                              Srl.getOperand(1).getValueType());
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index bc1655b673d7..0ca12510a230 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -52,6 +52,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt);
 
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7cad9f9bd43e..03d9eefd59d0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -166,6 +166,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtZbp())
     setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
 
+  if (Subtarget.hasStdExtZbt()) {
+    setOperationAction(ISD::FSHL, XLenVT, Legal);
+    setOperationAction(ISD::FSHR, XLenVT, Legal);
+  }
+
   ISD::CondCode FPCCToExtend[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index aa1ed7ff79cd..afac509f743d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -643,6 +643,7 @@ def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
 def RORIWPat  : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
+def FSRIWPat  : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>;
 
 let Predicates = [HasStdExtZbbOrZbp] in {
 def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
@@ -804,6 +805,19 @@ def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
+let Predicates = [HasStdExtZbt] in {
+def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
+          (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1),
+          (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3),
+          (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3),
+          (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
+          (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbt]
+
 let Predicates = [HasStdExtZbb] in {
 def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
 def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
@@ -1004,6 +1018,31 @@ def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
 def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
+                          (i64 0),
+                          (i64 17),
+                          (assertsexti32 GPR:$rs1),
+                          (or (riscv_sllw (assertsexti32 GPR:$rs1),
+                                          (and (assertsexti32 GPR:$rs3), 31)),
+                              (riscv_srlw (assertsexti32 GPR:$rs2),
+                                          (sub (i64 32),
+                                               (assertsexti32 GPR:$rs3))))),
+          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
+                          (i64 0),
+                          (i64 17),
+                          (assertsexti32 GPR:$rs2),
+                          (or (riscv_sllw (assertsexti32 GPR:$rs1),
+                                          (sub (i64 32),
+                                               (assertsexti32 GPR:$rs3))),
+                              (riscv_srlw (assertsexti32 GPR:$rs2),
+                                          (and (assertsexti32 GPR:$rs3), 31)))),
+          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
           (CLZW GPR:$rs1)>;

diff  --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
new file mode 100644
index 000000000000..54b5b79778f4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
@@ -0,0 +1,569 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBT
+
+define i32 @cmix_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: cmix_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmix_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmix a0, a1, a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmix_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmix a0, a1, a0, a2
+; RV32IBT-NEXT:    ret
+  %and = and i32 %b, %a
+  %neg = xor i32 %b, -1
+  %and1 = and i32 %neg, %c
+  %or = or i32 %and1, %and
+  ret i32 %or
+}
+
+define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmix_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmix_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmix a0, a2, a0, a4
+; RV32IB-NEXT:    cmix a1, a3, a1, a5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmix_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmix a0, a2, a0, a4
+; RV32IBT-NEXT:    cmix a1, a3, a1, a5
+; RV32IBT-NEXT:    ret
+  %and = and i64 %b, %a
+  %neg = xor i64 %b, -1
+  %and1 = and i64 %neg, %c
+  %or = or i64 %and1, %and
+  ret i64 %or
+}
+
+define i32 @cmov_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: cmov_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a1, .LBB2_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:  .LBB2_2:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmov_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmov a0, a1, a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmov_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmov a0, a1, a0, a2
+; RV32IBT-NEXT:    ret
+  %tobool.not = icmp eq i32 %b, 0
+  %cond = select i1 %tobool.not, i32 %c, i32 %a
+  ret i32 %cond
+}
+
+define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmov_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    beqz a2, .LBB3_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    mv a5, a1
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmov_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    or a2, a2, a3
+; RV32IB-NEXT:    cmov a0, a2, a0, a4
+; RV32IB-NEXT:    cmov a1, a2, a1, a5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmov_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    or a2, a2, a3
+; RV32IBT-NEXT:    cmov a0, a2, a0, a4
+; RV32IBT-NEXT:    cmov a1, a2, a1, a5
+; RV32IBT-NEXT:    ret
+  %tobool.not = icmp eq i64 %b, 0
+  %cond = select i1 %tobool.not, i64 %c, i64 %a
+  ret i64 %cond
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: fshl_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 31
+; RV32I-NEXT:    beqz a3, .LBB4_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a2, a2, a3
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshl_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsl a0, a0, a2, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshl_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsl a0, a0, a2, a1
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet an efficient pattern-matching with bit manipulation
+; instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions that can match more efficiently this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: fshl_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi t1, a4, 63
+; RV32I-NEXT:    addi a6, t1, -32
+; RV32I-NEXT:    addi a7, zero, 31
+; RV32I-NEXT:    bltz a6, .LBB5_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll t0, a0, a6
+; RV32I-NEXT:    j .LBB5_3
+; RV32I-NEXT:  .LBB5_2:
+; RV32I-NEXT:    sll t0, a1, a4
+; RV32I-NEXT:    sub t2, a7, t1
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    srl a5, a5, t2
+; RV32I-NEXT:    or t0, t0, a5
+; RV32I-NEXT:  .LBB5_3:
+; RV32I-NEXT:    addi a5, zero, 32
+; RV32I-NEXT:    sub t4, a5, t1
+; RV32I-NEXT:    addi a5, zero, 64
+; RV32I-NEXT:    sub t2, a5, t1
+; RV32I-NEXT:    bltz t4, .LBB5_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t3, zero
+; RV32I-NEXT:    bnez t1, .LBB5_6
+; RV32I-NEXT:    j .LBB5_7
+; RV32I-NEXT:  .LBB5_5:
+; RV32I-NEXT:    srl t3, a3, t2
+; RV32I-NEXT:    beqz t1, .LBB5_7
+; RV32I-NEXT:  .LBB5_6:
+; RV32I-NEXT:    or a1, t0, t3
+; RV32I-NEXT:  .LBB5_7:
+; RV32I-NEXT:    bltz t4, .LBB5_10
+; RV32I-NEXT:  # %bb.8:
+; RV32I-NEXT:    srl a2, a3, t4
+; RV32I-NEXT:    bgez a6, .LBB5_11
+; RV32I-NEXT:  .LBB5_9:
+; RV32I-NEXT:    sll a3, a0, a4
+; RV32I-NEXT:    bnez t1, .LBB5_12
+; RV32I-NEXT:    j .LBB5_13
+; RV32I-NEXT:  .LBB5_10:
+; RV32I-NEXT:    srl a2, a2, t2
+; RV32I-NEXT:    sub a5, a7, t2
+; RV32I-NEXT:    slli a3, a3, 1
+; RV32I-NEXT:    sll a3, a3, a5
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    bltz a6, .LBB5_9
+; RV32I-NEXT:  .LBB5_11:
+; RV32I-NEXT:    mv a3, zero
+; RV32I-NEXT:    beqz t1, .LBB5_13
+; RV32I-NEXT:  .LBB5_12:
+; RV32I-NEXT:    or a0, a3, a2
+; RV32I-NEXT:  .LBB5_13:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshl_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi t1, a4, 63
+; RV32IB-NEXT:    addi a6, t1, -32
+; RV32IB-NEXT:    addi a7, zero, 31
+; RV32IB-NEXT:    bltz a6, .LBB5_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sll t0, a0, a6
+; RV32IB-NEXT:    j .LBB5_3
+; RV32IB-NEXT:  .LBB5_2:
+; RV32IB-NEXT:    sll t0, a1, a4
+; RV32IB-NEXT:    sub t2, a7, t1
+; RV32IB-NEXT:    srli a5, a0, 1
+; RV32IB-NEXT:    srl a5, a5, t2
+; RV32IB-NEXT:    or t0, t0, a5
+; RV32IB-NEXT:  .LBB5_3:
+; RV32IB-NEXT:    addi a5, zero, 32
+; RV32IB-NEXT:    sub t4, a5, t1
+; RV32IB-NEXT:    addi a5, zero, 64
+; RV32IB-NEXT:    sub t2, a5, t1
+; RV32IB-NEXT:    bltz t4, .LBB5_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t3, zero
+; RV32IB-NEXT:    or t0, t0, t3
+; RV32IB-NEXT:    bgez t4, .LBB5_8
+; RV32IB-NEXT:  .LBB5_5:
+; RV32IB-NEXT:    srl a2, a2, t2
+; RV32IB-NEXT:    sub a5, a7, t2
+; RV32IB-NEXT:    slli a3, a3, 1
+; RV32IB-NEXT:    sll a3, a3, a5
+; RV32IB-NEXT:    or a2, a2, a3
+; RV32IB-NEXT:    cmov a1, t1, t0, a1
+; RV32IB-NEXT:    bgez a6, .LBB5_9
+; RV32IB-NEXT:  .LBB5_6:
+; RV32IB-NEXT:    sll a3, a0, a4
+; RV32IB-NEXT:    j .LBB5_10
+; RV32IB-NEXT:  .LBB5_7:
+; RV32IB-NEXT:    srl t3, a3, t2
+; RV32IB-NEXT:    or t0, t0, t3
+; RV32IB-NEXT:    bltz t4, .LBB5_5
+; RV32IB-NEXT:  .LBB5_8:
+; RV32IB-NEXT:    srl a2, a3, t4
+; RV32IB-NEXT:    cmov a1, t1, t0, a1
+; RV32IB-NEXT:    bltz a6, .LBB5_6
+; RV32IB-NEXT:  .LBB5_9:
+; RV32IB-NEXT:    mv a3, zero
+; RV32IB-NEXT:  .LBB5_10:
+; RV32IB-NEXT:    or a2, a3, a2
+; RV32IB-NEXT:    cmov a0, t1, a2, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshl_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi t1, a4, 63
+; RV32IBT-NEXT:    addi a6, t1, -32
+; RV32IBT-NEXT:    addi a7, zero, 31
+; RV32IBT-NEXT:    bltz a6, .LBB5_2
+; RV32IBT-NEXT:  # %bb.1:
+; RV32IBT-NEXT:    sll t0, a0, a6
+; RV32IBT-NEXT:    j .LBB5_3
+; RV32IBT-NEXT:  .LBB5_2:
+; RV32IBT-NEXT:    sll t0, a1, a4
+; RV32IBT-NEXT:    sub t2, a7, t1
+; RV32IBT-NEXT:    srli a5, a0, 1
+; RV32IBT-NEXT:    srl a5, a5, t2
+; RV32IBT-NEXT:    or t0, t0, a5
+; RV32IBT-NEXT:  .LBB5_3:
+; RV32IBT-NEXT:    addi a5, zero, 32
+; RV32IBT-NEXT:    sub t4, a5, t1
+; RV32IBT-NEXT:    addi a5, zero, 64
+; RV32IBT-NEXT:    sub t2, a5, t1
+; RV32IBT-NEXT:    bltz t4, .LBB5_7
+; RV32IBT-NEXT:  # %bb.4:
+; RV32IBT-NEXT:    mv t3, zero
+; RV32IBT-NEXT:    or t0, t0, t3
+; RV32IBT-NEXT:    bgez t4, .LBB5_8
+; RV32IBT-NEXT:  .LBB5_5:
+; RV32IBT-NEXT:    srl a2, a2, t2
+; RV32IBT-NEXT:    sub a5, a7, t2
+; RV32IBT-NEXT:    slli a3, a3, 1
+; RV32IBT-NEXT:    sll a3, a3, a5
+; RV32IBT-NEXT:    or a2, a2, a3
+; RV32IBT-NEXT:    cmov a1, t1, t0, a1
+; RV32IBT-NEXT:    bgez a6, .LBB5_9
+; RV32IBT-NEXT:  .LBB5_6:
+; RV32IBT-NEXT:    sll a3, a0, a4
+; RV32IBT-NEXT:    j .LBB5_10
+; RV32IBT-NEXT:  .LBB5_7:
+; RV32IBT-NEXT:    srl t3, a3, t2
+; RV32IBT-NEXT:    or t0, t0, t3
+; RV32IBT-NEXT:    bltz t4, .LBB5_5
+; RV32IBT-NEXT:  .LBB5_8:
+; RV32IBT-NEXT:    srl a2, a3, t4
+; RV32IBT-NEXT:    cmov a1, t1, t0, a1
+; RV32IBT-NEXT:    bltz a6, .LBB5_6
+; RV32IBT-NEXT:  .LBB5_9:
+; RV32IBT-NEXT:    mv a3, zero
+; RV32IBT-NEXT:  .LBB5_10:
+; RV32IBT-NEXT:    or a2, a3, a2
+; RV32IBT-NEXT:    cmov a0, t1, a2, a0
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: fshr_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 31
+; RV32I-NEXT:    beqz a3, .LBB6_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a2, a2, a3
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshr_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsr a0, a0, a2, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshr_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsr a0, a0, a2, a1
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet an efficient pattern-matching with bit manipulation
+; instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions that can match more efficiently this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: fshr_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:    mv a6, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi t2, a5, -32
+; RV32I-NEXT:    addi a7, zero, 31
+; RV32I-NEXT:    bltz t2, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl t0, t1, t2
+; RV32I-NEXT:    j .LBB7_3
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    srl t0, a6, a4
+; RV32I-NEXT:    sub a3, a7, a5
+; RV32I-NEXT:    slli a2, t1, 1
+; RV32I-NEXT:    sll a2, a2, a3
+; RV32I-NEXT:    or t0, t0, a2
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a3, a2, a5
+; RV32I-NEXT:    addi a2, zero, 64
+; RV32I-NEXT:    sub a2, a2, a5
+; RV32I-NEXT:    bltz a3, .LBB7_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t3, zero
+; RV32I-NEXT:    bnez a5, .LBB7_6
+; RV32I-NEXT:    j .LBB7_7
+; RV32I-NEXT:  .LBB7_5:
+; RV32I-NEXT:    sll t3, a0, a2
+; RV32I-NEXT:    beqz a5, .LBB7_7
+; RV32I-NEXT:  .LBB7_6:
+; RV32I-NEXT:    or a6, t3, t0
+; RV32I-NEXT:  .LBB7_7:
+; RV32I-NEXT:    bltz a3, .LBB7_10
+; RV32I-NEXT:  # %bb.8:
+; RV32I-NEXT:    sll a0, a0, a3
+; RV32I-NEXT:    bgez t2, .LBB7_11
+; RV32I-NEXT:  .LBB7_9:
+; RV32I-NEXT:    srl a1, t1, a4
+; RV32I-NEXT:    bnez a5, .LBB7_12
+; RV32I-NEXT:    j .LBB7_13
+; RV32I-NEXT:  .LBB7_10:
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    sub a2, a7, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    bltz t2, .LBB7_9
+; RV32I-NEXT:  .LBB7_11:
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    beqz a5, .LBB7_13
+; RV32I-NEXT:  .LBB7_12:
+; RV32I-NEXT:    or t1, a0, a1
+; RV32I-NEXT:  .LBB7_13:
+; RV32I-NEXT:    mv a0, a6
+; RV32I-NEXT:    mv a1, t1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshr_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi t1, a4, 63
+; RV32IB-NEXT:    addi a6, t1, -32
+; RV32IB-NEXT:    addi a7, zero, 31
+; RV32IB-NEXT:    bltz a6, .LBB7_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    srl t0, a3, a6
+; RV32IB-NEXT:    j .LBB7_3
+; RV32IB-NEXT:  .LBB7_2:
+; RV32IB-NEXT:    srl t0, a2, a4
+; RV32IB-NEXT:    sub t2, a7, t1
+; RV32IB-NEXT:    slli a5, a3, 1
+; RV32IB-NEXT:    sll a5, a5, t2
+; RV32IB-NEXT:    or t0, t0, a5
+; RV32IB-NEXT:  .LBB7_3:
+; RV32IB-NEXT:    addi a5, zero, 32
+; RV32IB-NEXT:    sub t4, a5, t1
+; RV32IB-NEXT:    addi a5, zero, 64
+; RV32IB-NEXT:    sub t2, a5, t1
+; RV32IB-NEXT:    bltz t4, .LBB7_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t3, zero
+; RV32IB-NEXT:    or t0, t3, t0
+; RV32IB-NEXT:    bgez t4, .LBB7_8
+; RV32IB-NEXT:  .LBB7_5:
+; RV32IB-NEXT:    sll a1, a1, t2
+; RV32IB-NEXT:    sub a5, a7, t2
+; RV32IB-NEXT:    srli a0, a0, 1
+; RV32IB-NEXT:    srl a0, a0, a5
+; RV32IB-NEXT:    or a1, a1, a0
+; RV32IB-NEXT:    cmov a0, t1, t0, a2
+; RV32IB-NEXT:    bgez a6, .LBB7_9
+; RV32IB-NEXT:  .LBB7_6:
+; RV32IB-NEXT:    srl a2, a3, a4
+; RV32IB-NEXT:    j .LBB7_10
+; RV32IB-NEXT:  .LBB7_7:
+; RV32IB-NEXT:    sll t3, a0, t2
+; RV32IB-NEXT:    or t0, t3, t0
+; RV32IB-NEXT:    bltz t4, .LBB7_5
+; RV32IB-NEXT:  .LBB7_8:
+; RV32IB-NEXT:    sll a1, a0, t4
+; RV32IB-NEXT:    cmov a0, t1, t0, a2
+; RV32IB-NEXT:    bltz a6, .LBB7_6
+; RV32IB-NEXT:  .LBB7_9:
+; RV32IB-NEXT:    mv a2, zero
+; RV32IB-NEXT:  .LBB7_10:
+; RV32IB-NEXT:    or a1, a1, a2
+; RV32IB-NEXT:    cmov a1, t1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshr_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi t1, a4, 63
+; RV32IBT-NEXT:    addi a6, t1, -32
+; RV32IBT-NEXT:    addi a7, zero, 31
+; RV32IBT-NEXT:    bltz a6, .LBB7_2
+; RV32IBT-NEXT:  # %bb.1:
+; RV32IBT-NEXT:    srl t0, a3, a6
+; RV32IBT-NEXT:    j .LBB7_3
+; RV32IBT-NEXT:  .LBB7_2:
+; RV32IBT-NEXT:    srl t0, a2, a4
+; RV32IBT-NEXT:    sub t2, a7, t1
+; RV32IBT-NEXT:    slli a5, a3, 1
+; RV32IBT-NEXT:    sll a5, a5, t2
+; RV32IBT-NEXT:    or t0, t0, a5
+; RV32IBT-NEXT:  .LBB7_3:
+; RV32IBT-NEXT:    addi a5, zero, 32
+; RV32IBT-NEXT:    sub t4, a5, t1
+; RV32IBT-NEXT:    addi a5, zero, 64
+; RV32IBT-NEXT:    sub t2, a5, t1
+; RV32IBT-NEXT:    bltz t4, .LBB7_7
+; RV32IBT-NEXT:  # %bb.4:
+; RV32IBT-NEXT:    mv t3, zero
+; RV32IBT-NEXT:    or t0, t3, t0
+; RV32IBT-NEXT:    bgez t4, .LBB7_8
+; RV32IBT-NEXT:  .LBB7_5:
+; RV32IBT-NEXT:    sll a1, a1, t2
+; RV32IBT-NEXT:    sub a5, a7, t2
+; RV32IBT-NEXT:    srli a0, a0, 1
+; RV32IBT-NEXT:    srl a0, a0, a5
+; RV32IBT-NEXT:    or a1, a1, a0
+; RV32IBT-NEXT:    cmov a0, t1, t0, a2
+; RV32IBT-NEXT:    bgez a6, .LBB7_9
+; RV32IBT-NEXT:  .LBB7_6:
+; RV32IBT-NEXT:    srl a2, a3, a4
+; RV32IBT-NEXT:    j .LBB7_10
+; RV32IBT-NEXT:  .LBB7_7:
+; RV32IBT-NEXT:    sll t3, a0, t2
+; RV32IBT-NEXT:    or t0, t3, t0
+; RV32IBT-NEXT:    bltz t4, .LBB7_5
+; RV32IBT-NEXT:  .LBB7_8:
+; RV32IBT-NEXT:    sll a1, a0, t4
+; RV32IBT-NEXT:    cmov a0, t1, t0, a2
+; RV32IBT-NEXT:    bltz a6, .LBB7_6
+; RV32IBT-NEXT:  .LBB7_9:
+; RV32IBT-NEXT:    mv a2, zero
+; RV32IBT-NEXT:  .LBB7_10:
+; RV32IBT-NEXT:    or a1, a1, a2
+; RV32IBT-NEXT:    cmov a1, t1, a1, a3
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+define i32 @fshri_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: fshri_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a1, 5
+; RV32I-NEXT:    slli a0, a0, 27
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshri_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsri a0, a0, a1, 5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshri_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsri a0, a0, a1, 5
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
+  ret i32 %1
+}
+
+define i64 @fshri_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: fshri_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a3, 27
+; RV32I-NEXT:    srli a2, a2, 5
+; RV32I-NEXT:    or a2, a2, a1
+; RV32I-NEXT:    srli a1, a3, 5
+; RV32I-NEXT:    slli a0, a0, 27
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshri_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a1, zero, 27
+; RV32IB-NEXT:    fsl a2, a3, a1, a2
+; RV32IB-NEXT:    fsl a1, a0, a1, a3
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshri_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    addi a1, zero, 27
+; RV32IBT-NEXT:    fsl a2, a3, a1, a2
+; RV32IBT-NEXT:    fsl a1, a0, a1, a3
+; RV32IBT-NEXT:    mv a0, a2
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5)
+  ret i64 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
new file mode 100644
index 000000000000..22e25fadbd91
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBT
+
+define signext i32 @cmix_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: cmix_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmix_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmix a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmix_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmix a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %and = and i32 %b, %a
+  %neg = xor i32 %b, -1
+  %and1 = and i32 %neg, %c
+  %or = or i32 %and1, %and
+  ret i32 %or
+}
+
+define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: cmix_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmix_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmix a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmix_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmix a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %and = and i64 %b, %a
+  %neg = xor i64 %b, -1
+  %and1 = and i64 %neg, %c
+  %or = or i64 %and1, %and
+  ret i64 %or
+}
+
+define signext i32 @cmov_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: cmov_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a1, .LBB2_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:  .LBB2_2:
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmov_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmov a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmov_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmov a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %tobool.not = icmp eq i32 %b, 0
+  %cond = select i1 %tobool.not, i32 %c, i32 %a
+  ret i32 %cond
+}
+
+define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: cmov_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a1, .LBB3_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:  .LBB3_2:
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmov_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmov a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmov_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmov a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %tobool.not = icmp eq i64 %b, 0
+  %cond = select i1 %tobool.not, i64 %c, i64 %a
+  ret i64 %cond
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: fshl_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 31
+; RV64I-NEXT:    beqz a3, .LBB4_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    addi a4, zero, 32
+; RV64I-NEXT:    sub a2, a4, a2
+; RV64I-NEXT:    srlw a1, a1, a2
+; RV64I-NEXT:    sllw a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshl_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fslw a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshl_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fslw a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: fshl_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 63
+; RV64I-NEXT:    beqz a3, .LBB5_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    sll a0, a0, a2
+; RV64I-NEXT:    addi a2, zero, 64
+; RV64I-NEXT:    sub a2, a2, a3
+; RV64I-NEXT:    srl a1, a1, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshl_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsl a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshl_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsl a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: fshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 31
+; RV64I-NEXT:    beqz a3, .LBB6_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    srlw a1, a1, a3
+; RV64I-NEXT:    addi a3, zero, 32
+; RV64I-NEXT:    sub a2, a3, a2
+; RV64I-NEXT:    sllw a0, a0, a2
+; RV64I-NEXT:    or a1, a0, a1
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshr_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsrw a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshr_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsrw a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: fshr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 63
+; RV64I-NEXT:    beqz a3, .LBB7_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    srl a1, a1, a2
+; RV64I-NEXT:    addi a2, zero, 64
+; RV64I-NEXT:    sub a2, a2, a3
+; RV64I-NEXT:    sll a0, a0, a2
+; RV64I-NEXT:    or a1, a0, a1
+; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshr_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsr a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshr_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsr a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: fshri_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 5
+; RV64I-NEXT:    slli a0, a0, 27
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshri_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsriw a0, a0, a1, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshri_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsriw a0, a0, a1, 5
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
+  ret i32 %1
+}
+
+define i64 @fshri_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: fshri_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 5
+; RV64I-NEXT:    slli a0, a0, 59
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshri_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsri a0, a0, a1, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshri_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsri a0, a0, a1, 5
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5)
+  ret i64 %1
+}