[clang] fc02eeb - [RISCV] Add vendor-defined XTheadBb (basic bit-manipulation) extension

Philipp Tomsich via cfe-commits cfe-commits at lists.llvm.org
Mon Feb 13 08:02:22 PST 2023


Author: Philipp Tomsich
Date: 2023-02-13T17:02:09+01:00
New Revision: fc02eeb24fc024aa05fc2d58b73b713dc5bfd166

URL: https://github.com/llvm/llvm-project/commit/fc02eeb24fc024aa05fc2d58b73b713dc5bfd166
DIFF: https://github.com/llvm/llvm-project/commit/fc02eeb24fc024aa05fc2d58b73b713dc5bfd166.diff

LOG: [RISCV] Add vendor-defined XTheadBb (basic bit-manipulation) extension

The vendor-defined XTHeadBb (predating the standard Zbb extension)
extension adds some bit-manipulation extensions with somewhat similar
semantics as some of the Zbb instructions.

It is supported by the C9xx cores (e.g., found in the wild in the
Allwinner D1) by Alibaba T-Head.

The current (as of this commit) public documentation for XTHeadBb is
available from:
  https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf

Support for these instructions has already landed in GNU Binutils:
  https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=8254c3d2c94ae5458095ea6c25446ba89134b9da

Depends on D143036

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D143439

Added: 
    clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
    clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
    llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
    llvm/test/CodeGen/RISCV/rv64xtheadbb.ll

Modified: 
    clang/include/clang/Basic/BuiltinsRISCV.def
    llvm/docs/RISCVUsage.rst
    llvm/docs/ReleaseNotes.rst
    llvm/lib/Support/RISCVISAInfo.cpp
    llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
    llvm/lib/Target/RISCV/RISCVFeatures.td
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
    llvm/test/CodeGen/RISCV/attributes.ll
    llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
    llvm/test/CodeGen/RISCV/imm.ll
    llvm/test/CodeGen/RISCV/rotl-rotr.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def
index c26e3b8073703..3ca7654a32adc 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.def
+++ b/clang/include/clang/Basic/BuiltinsRISCV.def
@@ -18,8 +18,8 @@
 // Zbb extension
 TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb")
 TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit")
-TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb")
-TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb,64bit")
+TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb|xtheadbb")
+TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb|xtheadbb,64bit")
 TARGET_BUILTIN(__builtin_riscv_ctz_32, "ZiZi", "nc", "zbb")
 TARGET_BUILTIN(__builtin_riscv_ctz_64, "WiWi", "nc", "zbb,64bit")
 

diff  --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
new file mode 100644
index 0000000000000..915dd806d2179
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
@@ -0,0 +1,28 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +xtheadbb -emit-llvm %s -o - \
+// RUN:     | FileCheck %s  -check-prefix=RV32XTHEADBB
+
+// RV32XTHEADBB-LABEL: @clz_32(
+// RV32XTHEADBB-NEXT:  entry:
+// RV32XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// RV32XTHEADBB-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+// RV32XTHEADBB-NEXT:    ret i32 [[TMP1]]
+//
+int clz_32(int a) {
+  return __builtin_riscv_clz_32(a);
+}
+
+// RV32XTHEADBB-LABEL: @clo_32(
+// RV32XTHEADBB-NEXT:  entry:
+// RV32XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// RV32XTHEADBB-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT:    [[NOT:%.*]] = xor i32 [[TMP0]], -1
+// RV32XTHEADBB-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false)
+// RV32XTHEADBB-NEXT:    ret i32 [[TMP1]]
+//
+int clo_32(int a) {
+  return __builtin_riscv_clz_32(~a);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
new file mode 100644
index 0000000000000..3b6ef569e6b85
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
@@ -0,0 +1,53 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadbb -emit-llvm %s -o - \
+// RUN:     | FileCheck %s  -check-prefix=RV64XTHEADBB
+
+// RV64XTHEADBB-LABEL: @clz_32(
+// RV64XTHEADBB-NEXT:  entry:
+// RV64XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// RV64XTHEADBB-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+// RV64XTHEADBB-NEXT:    ret i32 [[TMP1]]
+//
+int clz_32(int a) {
+  return __builtin_riscv_clz_32(a);
+}
+
+// RV64XTHEADBB-LABEL: @clo_32(
+// RV64XTHEADBB-NEXT:  entry:
+// RV64XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// RV64XTHEADBB-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT:    [[NOT:%.*]] = xor i32 [[TMP0]], -1
+// RV64XTHEADBB-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false)
+// RV64XTHEADBB-NEXT:    ret i32 [[TMP1]]
+//
+int clo_32(int a) {
+  return __builtin_riscv_clz_32(~a);
+}
+
+// RV64XTHEADBB-LABEL: @clz_64(
+// RV64XTHEADBB-NEXT:  entry:
+// RV64XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
+// RV64XTHEADBB-NEXT:    store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT:    [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP0]], i1 false)
+// RV64XTHEADBB-NEXT:    ret i64 [[TMP1]]
+//
+long clz_64(long a) {
+  return __builtin_riscv_clz_64(a);
+}
+
+// RV64XTHEADBB-LABEL: @clo_64(
+// RV64XTHEADBB-NEXT:  entry:
+// RV64XTHEADBB-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
+// RV64XTHEADBB-NEXT:    store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT:    [[NOT:%.*]] = xor i64 [[TMP0]], -1
+// RV64XTHEADBB-NEXT:    [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[NOT]], i1 false)
+// RV64XTHEADBB-NEXT:    ret i64 [[TMP1]]
+//
+long clo_64(long a) {
+  return __builtin_riscv_clz_64(~a);
+}

diff  --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index 266fb6cd3e9c3..d9e6fc45e037e 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -172,6 +172,9 @@ The current vendor extensions supported are:
 ``XTHeadBa``
   LLVM implements `the THeadBa (address-generation) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
 
+``XTHeadBb``
+  LLVM implements `the THeadBb (basic bit-manipulation) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
+
 ``XTHeadBs``
   LLVM implements `the THeadBs (single-bit operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
 

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 7ade6be2fb6ef..7e238dd052020 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -109,6 +109,7 @@ Changes to the RISC-V Backend
 * vsetvli intrinsics no longer have side effects. They may now be combined,
   moved, deleted, etc. by optimizations.
 * Adds support for the vendor-defined XTHeadBa (address-generation) extension.
+* Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension.
 * Adds support for the vendor-defined XTHeadBs (single-bit) extension.
 
 Changes to the WebAssembly Backend

diff  --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 92c15885b225a..70095d0836719 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -110,6 +110,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
 
     // vendor-defined ('X') extensions
     {"xtheadba", RISCVExtensionVersion{1, 0}},
+    {"xtheadbb", RISCVExtensionVersion{1, 0}},
     {"xtheadbs", RISCVExtensionVersion{1, 0}},
     {"xtheadvdot", RISCVExtensionVersion{1, 0}},
     {"xventanacondops", RISCVExtensionVersion{1, 0}},

diff  --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7276d8240520b..647607a6664da 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -478,6 +478,13 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       if (Result != MCDisassembler::Fail)
         return Result;
     }
+    if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBb]) {
+      LLVM_DEBUG(dbgs() << "Trying XTHeadBb custom opcode table:\n");
+      Result = decodeInstruction(DecoderTableTHeadBb32, MI, Insn, Address, this,
+                                 STI);
+      if (Result != MCDisassembler::Fail)
+        return Result;
+    }
     if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBs]) {
       LLVM_DEBUG(dbgs() << "Trying XTHeadBs custom opcode table:\n");
       Result = decodeInstruction(DecoderTableTHeadBs32, MI, Insn, Address, this,

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index bc2f6683392c7..57908723530de 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -352,15 +352,20 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
     }
   }
 
-  // Perform optimization with rori in the Zbb extension.
-  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
+  // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
+  // extension.
+  if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] ||
+                         ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) {
     if (unsigned Rotate = extractRotateInfo(Val)) {
       RISCVMatInt::InstSeq TmpSeq;
       uint64_t NegImm12 =
           ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
       assert(isInt<12>(NegImm12));
       TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
-      TmpSeq.emplace_back(RISCV::RORI, Rotate);
+      TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb]
+                              ? RISCV::RORI
+                              : RISCV::TH_SRRI,
+                          Rotate);
       Res = TmpSeq;
     }
   }
@@ -405,6 +410,7 @@ OpndKind Inst::getOpndKind() const {
   case RISCV::RORI:
   case RISCV::BSETI:
   case RISCV::BCLRI:
+  case RISCV::TH_SRRI:
     return RISCVMatInt::RegImm;
   }
 }

diff  --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 838056f391c9a..e6cd3341198bf 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -470,6 +470,13 @@ def HasVendorXTHeadBa : Predicate<"Subtarget->hasVendorXTHeadBa()">,
                                   AssemblerPredicate<(all_of FeatureVendorXTHeadBa),
                                   "'xtheadba' (T-Head address calculation instructions)">;
 
+def FeatureVendorXTHeadBb
+    : SubtargetFeature<"xtheadbb", "HasVendorXTHeadBb", "true",
+                       "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+def HasVendorXTHeadBb : Predicate<"Subtarget->hasVendorXTHeadBb()">,
+                                  AssemblerPredicate<(all_of FeatureVendorXTHeadBb),
+                                  "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+
 def FeatureVendorXTHeadBs
     : SubtargetFeature<"xtheadbs", "HasVendorXTHeadBs", "true",
                        "'xtheadbs' (T-Head single-bit instructions)">;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index a7f6275dee5ff..e156f4dc82e56 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1104,11 +1104,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     bool IsANDIOrZExt =
         isInt<12>(C2) ||
         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
+    // With XTHeadBb, we can use TH.EXTU.
+    IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
       break;
     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
     // the constant is a simm32.
     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
+    // With XTHeadBb, we can use TH.EXTU.
+    IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
       break;
 
@@ -2386,6 +2390,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
     case RISCV::FCVT_S_WU:
     case RISCV::FCVT_D_W:
     case RISCV::FCVT_D_WU:
+    case RISCV::TH_REVW:
+    case RISCV::TH_SRRIW:
       if (Bits < 32)
         return false;
       break;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c6ec478cceaf3..3c6404e146661 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -233,7 +233,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 
-  if (!Subtarget.hasStdExtZbb())
+  if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
     setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
 
   if (Subtarget.is64Bit()) {
@@ -280,7 +280,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
                      Custom);
 
-  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
+  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+      Subtarget.hasVendorXTHeadBb()) {
     if (Subtarget.is64Bit())
       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
   } else {
@@ -290,7 +291,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
   // pattern match it directly in isel.
   setOperationAction(ISD::BSWAP, XLenVT,
-                     (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
+                     (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+                      Subtarget.hasVendorXTHeadBb())
                          ? Legal
                          : Expand);
   // Zbkb can use rev8+brev8 to implement bitreverse.
@@ -309,6 +311,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
   }
 
+  if (Subtarget.hasVendorXTHeadBb()) {
+    setOperationAction({ISD::CTLZ}, XLenVT, Legal);
+
+    // We need the custom lowering to make sure that the resulting sequence
+    // for the 32bit case is efficient on 64bit targets.
+    if (Subtarget.is64Bit())
+      setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+  }
+
   if (Subtarget.is64Bit())
     setOperationAction(ISD::ABS, MVT::i32, Custom);
 
@@ -1212,7 +1223,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
 }
 
 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
-  return Subtarget.hasStdExtZbb();
+  return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
 }
 
 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 87704ce9c27cc..f85af115f8770 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -54,6 +54,38 @@ class THShiftALU_rri<bits<3> funct3, string opcodestr>
   let Inst{26-25} = uimm2;
 }
 
+let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
+  hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
+    : RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+                   (ins GPR:$rs1, uimmlog2xlen:$shamt),
+		   opcodestr, "$rd, $rs1, $shamt">;
+
+class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
+    : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+              (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
+	      opcodestr, "$rd, $rs1, $msb, $lsb"> {
+  bits<6> msb;
+  bits<6> lsb;
+  let Inst{31-26} = msb;
+  let Inst{25-20} = lsb;
+}
+
+class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
+    : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
+               opcodestr, "$rd, $rs1"> {
+  let rs3 = funct5;
+  let rs2 = 0;
+}
+}
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
+  hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
+    : RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+                    (ins GPR:$rs1, uimm5:$shamt),
+		    opcodestr, "$rd, $rs1, $shamt">;
+
 //===----------------------------------------------------------------------===//
 // Combination of instruction classes.
 // Use these multiclasses to define instructions more easily.
@@ -75,6 +107,21 @@ def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
                Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
 } // Predicates = [HasVendorXTHeadBa]
 
+let Predicates = [HasVendorXTHeadBb] in {
+def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
+def TH_EXT : THBitfieldExtract_rii<0b010, "th.ext">;
+def TH_EXTU : THBitfieldExtract_rii<0b011, "th.extu">;
+def TH_FF0 : THRev_r<0b10000, 0b10, "th.ff0">;
+def TH_FF1 : THRev_r<0b10000, 0b11, "th.ff1">;
+def TH_REV : THRev_r<0b10000, 0b01, "th.rev">;
+def TH_TSTNBZ : THRev_r<0b10000, 0b00, "th.tstnbz">;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], IsSignExtendingOpW = 1 in {
+def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
+def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
 let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
 let IsSignExtendingOpW = 1 in
 def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
@@ -230,6 +277,49 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
 		       (TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>;
 } // Predicates = [HasVendorXTHeadBa]
 
+let Predicates = [HasVendorXTHeadBb] in {
+def : PatGprImm<rotr, TH_SRRI, uimmlog2xlen>;
+// There's no encoding for a rotate-left-immediate in X-THead-Bb, as
+// it can be implemented with th.srri by negating the immediate.
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+          (TH_SRRI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
+def : Pat<(rotr GPR:$rs1, GPR:$rs2),
+           (OR (SRL GPR:$rs1, GPR:$rs2),
+               (SLL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
+def : Pat<(rotl GPR:$rs1, GPR:$rs2),
+           (OR (SLL GPR:$rs1, GPR:$rs2),
+               (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
+//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>;
+//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>;
+def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>;
+def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i1), (TH_EXT GPR:$rs1, 0, 0)>;
+def : PatGpr<ctlz, TH_FF1>;
+def : Pat<(ctlz (xor GPR:$rs1, -1)), (TH_FF0 GPR:$rs1)>;
+def : PatGpr<bswap, TH_REV>;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64] in {
+def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+          (TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+def : Pat<(riscv_rorw i64:$rs1, i64:$rs2),
+          (OR (SRLW i64:$rs1, i64:$rs2),
+              (SLLW i64:$rs1, (SUB X0, i64:$rs2)))>;
+def : Pat<(riscv_rolw i64:$rs1, i64:$rs2),
+          (OR (SLLW i64:$rs1, i64:$rs2),
+              (SRLW i64:$rs1, (SUB X0, i64:$rs2)))>;
+def : Pat<(sra (bswap i64:$rs1), (i64 32)),
+          (TH_REVW i64:$rs1)>;
+def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)),
+          (TH_REVW i64:$rs1)>;
+def : Pat<(riscv_clzw i64:$rs1),
+          (TH_FF0 (SLLI (XORI i64:$rs1, -1), 32))>;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
 let Predicates = [HasVendorXTHeadBs] in {
 def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), 1),
           (TH_TST GPR:$rs1, uimmlog2xlen:$shamt)>;

diff  --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 30fd52d2bb508..68638ffa75d59 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -89,6 +89,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV64SVINVAL %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefixes=CHECK,RV64XVENTANACONDOPS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBA %s
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s
@@ -187,6 +188,7 @@
 ; RV64SVINVAL: .attribute 5, "rv64i2p0_svinval1p0"
 ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p0_xventanacondops1p0"
 ; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0"
+; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0"
 ; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
 ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
 ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"

diff  --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index e7a1ea0e1bdff..f251c9808f05c 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -11,6 +11,10 @@
 ; RUN:   | FileCheck %s -check-prefix=RV32ZBB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64ZBB
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32XTHEADBB
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64XTHEADBB
 
 declare i8 @llvm.cttz.i8(i8, i1)
 declare i16 @llvm.cttz.i16(i16, i1)
@@ -83,6 +87,38 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV64ZBB-NEXT:    ori a0, a0, 256
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i8:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a1, a0, 255
+; RV32XTHEADBB-NEXT:    beqz a1, .LBB0_2
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB0_2:
+; RV32XTHEADBB-NEXT:    li a0, 8
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i8:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a1, a0, 255
+; RV64XTHEADBB-NEXT:    beqz a1, .LBB0_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB0_2:
+; RV64XTHEADBB-NEXT:    li a0, 8
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false)
   ret i8 %tmp
 }
@@ -161,6 +197,38 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    or a0, a0, a1
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i16:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    slli a1, a0, 16
+; RV32XTHEADBB-NEXT:    beqz a1, .LBB1_2
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB1_2:
+; RV32XTHEADBB-NEXT:    li a0, 16
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i16:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    slli a1, a0, 48
+; RV64XTHEADBB-NEXT:    beqz a1, .LBB1_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB1_2:
+; RV64XTHEADBB-NEXT:    li a0, 16
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false)
   ret i16 %tmp
 }
@@ -261,6 +329,37 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctzw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    beqz a0, .LBB2_2
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB2_2:
+; RV32XTHEADBB-NEXT:    li a0, 32
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sext.w a1, a0
+; RV64XTHEADBB-NEXT:    beqz a1, .LBB2_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB2_2:
+; RV64XTHEADBB-NEXT:    li a0, 32
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
   ret i32 %tmp
 }
@@ -408,6 +507,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    addi a0, a1, -1
+; RV32XTHEADBB-NEXT:    not a1, a1
+; RV32XTHEADBB-NEXT:    and a0, a1, a0
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 64
+; RV32XTHEADBB-NEXT:    j .LBB3_3
+; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB3_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB3_2:
+; RV64XTHEADBB-NEXT:    li a0, 64
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
   ret i64 %tmp
 }
@@ -456,6 +591,26 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i8_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i8_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true)
   ret i8 %tmp
 }
@@ -518,6 +673,26 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i16_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i16_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
   ret i16 %tmp
 }
@@ -596,6 +771,26 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctzw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i32_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i32_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true)
   ret i32 %tmp
 }
@@ -723,6 +918,37 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ctz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i64_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB7_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    addi a0, a1, -1
+; RV32XTHEADBB-NEXT:    not a1, a1
+; RV32XTHEADBB-NEXT:    and a0, a1, a0
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 64
+; RV32XTHEADBB-NEXT:    j .LBB7_3
+; RV32XTHEADBB-NEXT:  .LBB7_2:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:  .LBB7_3:
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i64_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true)
   ret i64 %tmp
 }
@@ -801,6 +1027,20 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    addi a0, a0, -56
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i8:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a0, a0, 255
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, -24
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i8:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a0, a0, 255
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -56
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
   ret i8 %tmp
 }
@@ -897,6 +1137,20 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    addi a0, a0, -48
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i16:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, -16
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i16:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -48
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
   ret i16 %tmp
 }
@@ -1081,6 +1335,18 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    clzw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
   ret i32 %tmp
 }
@@ -1344,6 +1610,24 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a1, .LBB11_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, 32
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB11_2:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a1
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
   ret i64 %tmp
 }
@@ -1410,6 +1694,20 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind {
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    addi a0, a0, -56
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i8_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a0, a0, 255
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, -24
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i8_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a0, a0, 255
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -56
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
   ret i8 %tmp
 }
@@ -1496,6 +1794,20 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    addi a0, a0, -48
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, -16
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -48
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
   ret i16 %tmp
 }
@@ -1658,6 +1970,18 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    clzw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i32_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i32_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
   ret i32 %tmp
 }
@@ -1911,6 +2235,24 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    clz a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i64_zero_undef:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a1, .LBB15_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, 32
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB15_2:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a1
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i64_zero_undef:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    ret
   %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
   ret i64 %tmp
 }
@@ -1955,6 +2297,34 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
 ; RV64ZBB-NEXT:    andi a0, a0, 255
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i8:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    andi a1, a1, 85
+; RV32XTHEADBB-NEXT:    sub a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a1, a0, 51
+; RV32XTHEADBB-NEXT:    srli a0, a0, 2
+; RV32XTHEADBB-NEXT:    andi a0, a0, 51
+; RV32XTHEADBB-NEXT:    add a0, a1, a0
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a0, a0, 15
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i8:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    andi a1, a1, 85
+; RV64XTHEADBB-NEXT:    subw a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a1, a0, 51
+; RV64XTHEADBB-NEXT:    srli a0, a0, 2
+; RV64XTHEADBB-NEXT:    andi a0, a0, 51
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a0, a0, 15
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i8 @llvm.ctpop.i8(i8 %a)
   ret i8 %1
 }
@@ -2013,6 +2383,48 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i16:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    lui a2, 5
+; RV32XTHEADBB-NEXT:    addi a2, a2, 1365
+; RV32XTHEADBB-NEXT:    and a1, a1, a2
+; RV32XTHEADBB-NEXT:    sub a0, a0, a1
+; RV32XTHEADBB-NEXT:    lui a1, 3
+; RV32XTHEADBB-NEXT:    addi a1, a1, 819
+; RV32XTHEADBB-NEXT:    and a2, a0, a1
+; RV32XTHEADBB-NEXT:    srli a0, a0, 2
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    add a0, a2, a0
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a1, a0, 15
+; RV32XTHEADBB-NEXT:    slli a0, a0, 20
+; RV32XTHEADBB-NEXT:    srli a0, a0, 28
+; RV32XTHEADBB-NEXT:    add a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i16:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    lui a2, 5
+; RV64XTHEADBB-NEXT:    addiw a2, a2, 1365
+; RV64XTHEADBB-NEXT:    and a1, a1, a2
+; RV64XTHEADBB-NEXT:    sub a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 3
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 819
+; RV64XTHEADBB-NEXT:    and a2, a0, a1
+; RV64XTHEADBB-NEXT:    srli a0, a0, 2
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    add a0, a2, a0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a1, a0, 15
+; RV64XTHEADBB-NEXT:    slli a0, a0, 52
+; RV64XTHEADBB-NEXT:    srli a0, a0, 60
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i16 @llvm.ctpop.i16(i16 %a)
   ret i16 %1
 }
@@ -2131,6 +2543,62 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi sp, sp, -16
+; RV32XTHEADBB-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    lui a2, 349525
+; RV32XTHEADBB-NEXT:    addi a2, a2, 1365
+; RV32XTHEADBB-NEXT:    and a1, a1, a2
+; RV32XTHEADBB-NEXT:    sub a0, a0, a1
+; RV32XTHEADBB-NEXT:    lui a1, 209715
+; RV32XTHEADBB-NEXT:    addi a1, a1, 819
+; RV32XTHEADBB-NEXT:    and a2, a0, a1
+; RV32XTHEADBB-NEXT:    srli a0, a0, 2
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    add a0, a2, a0
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    lui a1, 61681
+; RV32XTHEADBB-NEXT:    addi a1, a1, -241
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    lui a1, 4112
+; RV32XTHEADBB-NEXT:    addi a1, a1, 257
+; RV32XTHEADBB-NEXT:    call __mulsi3 at plt
+; RV32XTHEADBB-NEXT:    srli a0, a0, 24
+; RV32XTHEADBB-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    addi sp, sp, 16
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    lui a2, 349525
+; RV64XTHEADBB-NEXT:    addiw a2, a2, 1365
+; RV64XTHEADBB-NEXT:    and a1, a1, a2
+; RV64XTHEADBB-NEXT:    sub a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 209715
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 819
+; RV64XTHEADBB-NEXT:    and a2, a0, a1
+; RV64XTHEADBB-NEXT:    srli a0, a0, 2
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    add a0, a2, a0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 61681
+; RV64XTHEADBB-NEXT:    addiw a1, a1, -241
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 4112
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 257
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 24
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %1
 }
@@ -2295,6 +2763,91 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    cpop a0, a0
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi sp, sp, -32
+; RV32XTHEADBB-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT:    mv s0, a0
+; RV32XTHEADBB-NEXT:    srli a0, a1, 1
+; RV32XTHEADBB-NEXT:    lui a2, 349525
+; RV32XTHEADBB-NEXT:    addi s2, a2, 1365
+; RV32XTHEADBB-NEXT:    and a0, a0, s2
+; RV32XTHEADBB-NEXT:    sub a1, a1, a0
+; RV32XTHEADBB-NEXT:    lui a0, 209715
+; RV32XTHEADBB-NEXT:    addi s3, a0, 819
+; RV32XTHEADBB-NEXT:    and a0, a1, s3
+; RV32XTHEADBB-NEXT:    srli a1, a1, 2
+; RV32XTHEADBB-NEXT:    and a1, a1, s3
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    lui a1, 61681
+; RV32XTHEADBB-NEXT:    addi s4, a1, -241
+; RV32XTHEADBB-NEXT:    and a0, a0, s4
+; RV32XTHEADBB-NEXT:    lui a1, 4112
+; RV32XTHEADBB-NEXT:    addi s1, a1, 257
+; RV32XTHEADBB-NEXT:    mv a1, s1
+; RV32XTHEADBB-NEXT:    call __mulsi3 at plt
+; RV32XTHEADBB-NEXT:    srli s5, a0, 24
+; RV32XTHEADBB-NEXT:    srli a0, s0, 1
+; RV32XTHEADBB-NEXT:    and a0, a0, s2
+; RV32XTHEADBB-NEXT:    sub s0, s0, a0
+; RV32XTHEADBB-NEXT:    and a0, s0, s3
+; RV32XTHEADBB-NEXT:    srli s0, s0, 2
+; RV32XTHEADBB-NEXT:    and a1, s0, s3
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    and a0, a0, s4
+; RV32XTHEADBB-NEXT:    mv a1, s1
+; RV32XTHEADBB-NEXT:    call __mulsi3 at plt
+; RV32XTHEADBB-NEXT:    srli a0, a0, 24
+; RV32XTHEADBB-NEXT:    add a0, a0, s5
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT:    addi sp, sp, 32
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI19_0)
+; RV64XTHEADBB-NEXT:    ld a1, %lo(.LCPI19_0)(a1)
+; RV64XTHEADBB-NEXT:    lui a2, %hi(.LCPI19_1)
+; RV64XTHEADBB-NEXT:    ld a2, %lo(.LCPI19_1)(a2)
+; RV64XTHEADBB-NEXT:    srli a3, a0, 1
+; RV64XTHEADBB-NEXT:    and a1, a3, a1
+; RV64XTHEADBB-NEXT:    sub a0, a0, a1
+; RV64XTHEADBB-NEXT:    and a1, a0, a2
+; RV64XTHEADBB-NEXT:    srli a0, a0, 2
+; RV64XTHEADBB-NEXT:    and a0, a0, a2
+; RV64XTHEADBB-NEXT:    lui a2, %hi(.LCPI19_2)
+; RV64XTHEADBB-NEXT:    ld a2, %lo(.LCPI19_2)(a2)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    and a0, a0, a2
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI19_3)
+; RV64XTHEADBB-NEXT:    ld a1, %lo(.LCPI19_3)(a1)
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srli a0, a0, 56
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %1
 }
@@ -2337,6 +2890,30 @@ define i8 @test_parity_i8(i8 %a) {
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    andi a0, a0, 1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i8:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a0, a0, 255
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 2
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a0, a0, 1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i8:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a0, a0, 255
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 2
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a0, a0, 1
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i8 @llvm.ctpop.i8(i8 %a)
   %2 = and i8 %1, 1
   ret i8 %2
@@ -2386,6 +2963,34 @@ define i16 @test_parity_i16(i16 %a) {
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    andi a0, a0, 1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i16:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    srli a1, a0, 8
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 2
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a0, a0, 1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i16:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 8
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 2
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a0, a0, 1
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i16 @llvm.ctpop.i16(i16 %a)
   %2 = and i16 %1, 1
   ret i16 %2
@@ -2435,6 +3040,37 @@ define i32 @test_parity_i32(i32 %a) {
 ; RV64ZBB-NEXT:    cpopw a0, a0
 ; RV64ZBB-NEXT:    andi a0, a0, 1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srli a1, a0, 16
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 8
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 2
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a0, a0, 1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a1, a0, 31, 0
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 16
+; RV64XTHEADBB-NEXT:    xor a0, a1, a0
+; RV64XTHEADBB-NEXT:    srli a1, a0, 8
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 2
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a0, a0, 1
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   %2 = and i32 %1, 1
   ret i32 %2
@@ -2488,6 +3124,40 @@ define i64 @test_parity_i64(i64 %a) {
 ; RV64ZBB-NEXT:    cpop a0, a0
 ; RV64ZBB-NEXT:    andi a0, a0, 1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 16
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 8
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 4
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 2
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    srli a1, a0, 1
+; RV32XTHEADBB-NEXT:    xor a0, a0, a1
+; RV32XTHEADBB-NEXT:    andi a0, a0, 1
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srli a1, a0, 32
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 16
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 8
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 4
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 2
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    srli a1, a0, 1
+; RV64XTHEADBB-NEXT:    xor a0, a0, a1
+; RV64XTHEADBB-NEXT:    andi a0, a0, 1
+; RV64XTHEADBB-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   %2 = and i64 %1, 1
   ret i64 %2

diff  --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 15ea6cbd33471..45366798bf9af 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -9,6 +9,8 @@
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBB
 ; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbs \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB
 
 ; Materializing constants
 
@@ -41,6 +43,11 @@ define signext i32 @zero() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    li a0, 0
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: zero:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 0
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 0
 }
 
@@ -69,6 +76,11 @@ define signext i32 @pos_small() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    li a0, 2047
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 2047
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 2047
 }
 
@@ -97,6 +109,11 @@ define signext i32 @neg_small() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    li a0, -2048
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2048
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 -2048
 }
 
@@ -130,6 +147,12 @@ define signext i32 @pos_i32() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 423811
 ; RV64IZBS-NEXT:    addiw a0, a0, -1297
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 423811
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1297
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 1735928559
 }
 
@@ -163,6 +186,12 @@ define signext i32 @neg_i32() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 912092
 ; RV64IZBS-NEXT:    addiw a0, a0, -273
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 912092
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 -559038737
 }
 
@@ -191,6 +220,11 @@ define signext i32 @pos_i32_hi20_only() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    lui a0, 16
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 16
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 65536 ; 0x10000
 }
 
@@ -219,6 +253,11 @@ define signext i32 @neg_i32_hi20_only() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    lui a0, 1048560
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048560
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 -65536 ; -0x10000
 }
 
@@ -254,6 +293,12 @@ define signext i32 @imm_left_shifted_addi() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 32
 ; RV64IZBS-NEXT:    addiw a0, a0, -64
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 32
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -64
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 131008 ; 0x1FFC0
 }
 
@@ -289,6 +334,12 @@ define signext i32 @imm_right_shifted_addi() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 524288
 ; RV64IZBS-NEXT:    addiw a0, a0, -1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 2147483647 ; 0x7FFFFFFF
 }
 
@@ -324,6 +375,12 @@ define signext i32 @imm_right_shifted_lui() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 56
 ; RV64IZBS-NEXT:    addiw a0, a0, 580
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 56
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 580
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 229956 ; 0x38244
 }
 
@@ -356,6 +413,12 @@ define i64 @imm64_1() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    bseti a0, zero, 31
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 2147483648 ; 0x8000_0000
 }
 
@@ -389,6 +452,12 @@ define i64 @imm64_2() nounwind {
 ; RV64IZBS-NEXT:    li a0, -1
 ; RV64IZBS-NEXT:    srli a0, a0, 32
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 4294967295 ; 0xFFFF_FFFF
 }
 
@@ -421,6 +490,12 @@ define i64 @imm64_3() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    bseti a0, zero, 32
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 4294967296 ; 0x1_0000_0000
 }
 
@@ -453,6 +528,12 @@ define i64 @imm64_4() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    bseti a0, zero, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_4:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 9223372036854775808 ; 0x8000_0000_0000_0000
 }
 
@@ -485,6 +566,12 @@ define i64 @imm64_5() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    bseti a0, zero, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_5:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000
 }
 
@@ -523,6 +610,13 @@ define i64 @imm64_6() nounwind {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1329
 ; RV64IZBS-NEXT:    slli a0, a0, 35
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_6:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 9321
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1329
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
 }
 
@@ -569,6 +663,15 @@ define i64 @imm64_7() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 24
 ; RV64IZBS-NEXT:    addi a0, a0, 15
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_7:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 7
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 11
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 15
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F
 }
 
@@ -629,6 +732,18 @@ define i64 @imm64_8() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 13
 ; RV64IZBS-NEXT:    addi a0, a0, -272
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_8:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 583
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1875
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -947
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1511
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -272
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0
 }
 
@@ -658,6 +773,11 @@ define i64 @imm64_9() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    li a0, -1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_9:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -1
 }
 
@@ -694,6 +814,12 @@ define i64 @imm_left_shifted_lui_1() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 262145
 ; RV64IZBS-NEXT:    slli a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 2147491840 ; 0x8000_2000
 }
 
@@ -727,6 +853,12 @@ define i64 @imm_left_shifted_lui_2() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 262145
 ; RV64IZBS-NEXT:    slli a0, a0, 2
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 4294983680 ; 0x1_0000_4000
 }
 
@@ -761,6 +893,12 @@ define i64 @imm_left_shifted_lui_3() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 4097
 ; RV64IZBS-NEXT:    slli a0, a0, 20
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 17596481011712 ; 0x1001_0000_0000
 }
 
@@ -799,6 +937,12 @@ define i64 @imm_right_shifted_lui_1() nounwind {
 ; RV64IZBS-NEXT:    lui a0, 983056
 ; RV64IZBS-NEXT:    srli a0, a0, 16
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 983056
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 281474976706561 ; 0xFFFF_FFFF_F001
 }
 
@@ -837,6 +981,13 @@ define i64 @imm_right_shifted_lui_2() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    srli a0, a0, 24
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1044481
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 1099511623681 ; 0xFF_FFFF_F001
 }
 
@@ -877,6 +1028,13 @@ define i64 @imm_decoupled_lui_addi() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 20
 ; RV64IZBS-NEXT:    addi a0, a0, -3
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_decoupled_lui_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -3
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 17596481011709 ; 0x1000_FFFF_FFFD
 }
 
@@ -925,6 +1083,15 @@ define i64 @imm_end_xori_1() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 25
 ; RV64IZBS-NEXT:    addi a0, a0, -1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_xori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 25
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
 }
 
@@ -974,6 +1141,15 @@ define i64 @imm_end_2addi_1() nounwind {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 2047
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_2addi_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2047
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 27
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 2047
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
 }
 
@@ -1030,6 +1206,17 @@ define i64 @imm_2reg_1() nounwind {
 ; RV64IZBS-NEXT:    bseti a0, a0, 62
 ; RV64IZBS-NEXT:    bseti a0, a0, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2reg_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 9
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 837
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
 
@@ -1064,6 +1251,12 @@ define void @imm_store_i16_neg1(ptr %p) nounwind {
 ; RV64IZBS-NEXT:    li a1, -1
 ; RV64IZBS-NEXT:    sh a1, 0(a0)
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i16_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sh a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
   store i16 -1, ptr %p
   ret void
 }
@@ -1099,6 +1292,12 @@ define void @imm_store_i32_neg1(ptr %p) nounwind {
 ; RV64IZBS-NEXT:    li a1, -1
 ; RV64IZBS-NEXT:    sw a1, 0(a0)
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i32_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sw a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
   store i32 -1, ptr %p
   ret void
 }
@@ -1140,6 +1339,14 @@ define i64 @imm_5372288229() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -795
 ; RV64IZBS-NEXT:    bseti a0, a0, 32
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 160
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -795
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 5372288229
 }
 
@@ -1180,6 +1387,14 @@ define i64 @imm_neg_5372288229() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 795
 ; RV64IZBS-NEXT:    bclri a0, a0, 32
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048416
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 795
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -5372288229
 }
 
@@ -1220,6 +1435,14 @@ define i64 @imm_8953813715() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1325
 ; RV64IZBS-NEXT:    bseti a0, a0, 33
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 267
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1325
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 8953813715
 }
 
@@ -1260,6 +1483,14 @@ define i64 @imm_neg_8953813715() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 1325
 ; RV64IZBS-NEXT:    bclri a0, a0, 33
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048309
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1325
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -8953813715
 }
 
@@ -1301,6 +1532,14 @@ define i64 @imm_16116864687() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 1711
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 961
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1711
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 16116864687
 }
 
@@ -1342,6 +1581,14 @@ define i64 @imm_neg_16116864687() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, -1711
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047615
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1711
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -16116864687
 }
 
@@ -1380,6 +1627,13 @@ define i64 @imm_2344336315() {
 ; RV64IZBS-NEXT:    slli a0, a0, 2
 ; RV64IZBS-NEXT:    addi a0, a0, -1093
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2344336315:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 143087
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 2344336315 ; 0x8bbbbbbb
 }
 
@@ -1427,6 +1681,16 @@ define i64 @imm_70370820078523() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1093
 ; RV64IZBS-NEXT:    bseti a0, a0, 46
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_70370820078523:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 256
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 70370820078523 ; 0x40007bbbbbbb
 }
 
@@ -1477,6 +1741,17 @@ define i64 @imm_neg_9223372034778874949() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1093
 ; RV64IZBS-NEXT:    bseti a0, a0, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 37
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
 }
 
@@ -1528,6 +1803,17 @@ define i64 @imm_neg_9223301666034697285() {
 ; RV64IZBS-NEXT:    bseti a0, a0, 46
 ; RV64IZBS-NEXT:    bseti a0, a0, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223301666034697285:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917505
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 8
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -9223301666034697285 ; 0x800040007bbbbbbb
 }
 
@@ -1566,6 +1852,13 @@ define i64 @imm_neg_2219066437() {
 ; RV64IZBS-NEXT:    slli a0, a0, 2
 ; RV64IZBS-NEXT:    addi a0, a0, -1093
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2219066437:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 913135
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2219066437 ; 0xffffffff7bbbbbbb
 }
 
@@ -1608,6 +1901,14 @@ define i64 @imm_neg_8798043653189() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1093
 ; RV64IZBS-NEXT:    bclri a0, a0, 43
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8798043653189:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917475
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb
 }
 
@@ -1653,6 +1954,15 @@ define i64 @imm_9223372034904144827() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1093
 ; RV64IZBS-NEXT:    bclri a0, a0, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048343
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1911
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
 }
 
@@ -1705,6 +2015,17 @@ define i64 @imm_neg_9223354442718100411() {
 ; RV64IZBS-NEXT:    bclri a0, a0, 44
 ; RV64IZBS-NEXT:    bclri a0, a0, 63
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223354442718100411:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524287
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 6
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -29
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb
 }
 
@@ -1743,6 +2064,13 @@ define i64 @imm_2863311530() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 1365
 ; RV64IZBS-NEXT:    slli a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 349525
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
 	ret i64 2863311530 ; #0xaaaaaaaa
 }
 
@@ -1781,6 +2109,13 @@ define i64 @imm_neg_2863311530() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1365
 ; RV64IZBS-NEXT:    slli a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 699051
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
 	ret i64 -2863311530 ; #0xffffffff55555556
 }
 
@@ -1818,6 +2153,13 @@ define i64 @imm_2147486378() {
 ; RV64IZBS-NEXT:    li a0, 1365
 ; RV64IZBS-NEXT:    bseti a0, a0, 31
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2147486378:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 2147485013
 }
 
@@ -1852,6 +2194,12 @@ define i64 @imm_neg_2147485013() {
 ; RV64IZBS-NEXT:    lui a0, 524288
 ; RV64IZBS-NEXT:    addi a0, a0, -1365
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2147485013:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2147485013
 }
 
@@ -1894,6 +2242,14 @@ define i64 @imm_12900924131259() {
 ; RV64IZBS-NEXT:    slli a0, a0, 24
 ; RV64IZBS-NEXT:    addi a0, a0, 1979
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900924131259:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1979
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 12900924131259
 }
 
@@ -1930,6 +2286,13 @@ define i64 @imm_50394234880() {
 ; RV64IZBS-NEXT:    addiw a0, a0, -1093
 ; RV64IZBS-NEXT:    slli a0, a0, 16
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_50394234880:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 50394234880
 }
 
@@ -1976,6 +2339,15 @@ define i64 @imm_12900936431479() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 1911
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900936431479:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 192239
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 12900936431479
 }
 
@@ -2022,6 +2394,15 @@ define i64 @imm_12900918536874() {
 ; RV64IZBS-NEXT:    addi a0, a0, 1365
 ; RV64IZBS-NEXT:    slli a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900918536874:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 384477
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 12900918536874
 }
 
@@ -2071,6 +2452,16 @@ define i64 @imm_12900925247761() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 273
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900925247761:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 273
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 12900925247761
 }
 
@@ -2112,6 +2503,14 @@ define i64 @imm_7158272001() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_7158272001:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 427
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1367
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 7158272001 ; 0x0000_0001_aaaa_9001
 }
 
@@ -2153,6 +2552,14 @@ define i64 @imm_12884889601() {
 ; RV64IZBS-NEXT:    slli a0, a0, 12
 ; RV64IZBS-NEXT:    addi a0, a0, 1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12884889601:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 768
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -3
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 12884889601 ; 0x0000_0002_ffff_d001
 }
 
@@ -2193,6 +2600,14 @@ define i64 @imm_neg_3435982847() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 1
 ; RV64IZBS-NEXT:    bclri a0, a0, 31
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_3435982847:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048371
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 817
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -3435982847 ; 0xffff_ffff_3333_1001
 }
 
@@ -2233,6 +2648,14 @@ define i64 @imm_neg_5726842879() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 1
 ; RV64IZBS-NEXT:    bclri a0, a0, 32
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5726842879:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048235
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1419
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001
 }
 
@@ -2273,6 +2696,14 @@ define i64 @imm_neg_10307948543() {
 ; RV64IZBS-NEXT:    addiw a0, a0, 1
 ; RV64IZBS-NEXT:    bclri a0, a0, 33
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_10307948543:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047962
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1645
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -10307948543 ; 0xffff_fffd_9999_3001
 }
 
@@ -2310,6 +2741,12 @@ define i64 @li_rori_1() {
 ; RV64IZBS-NEXT:    slli a0, a0, 43
 ; RV64IZBS-NEXT:    addi a0, a0, -1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 21
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -149533581377537
 }
 
@@ -2347,6 +2784,12 @@ define i64 @li_rori_2() {
 ; RV64IZBS-NEXT:    slli a0, a0, 60
 ; RV64IZBS-NEXT:    addi a0, a0, -6
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -86
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 4
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -5764607523034234886
 }
 
@@ -2384,6 +2827,12 @@ define i64 @li_rori_3() {
 ; RV64IZBS-NEXT:    slli a0, a0, 27
 ; RV64IZBS-NEXT:    addi a0, a0, -1
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 37
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2281701377
 }
 
@@ -2420,6 +2869,13 @@ define i64 @PR54812() {
 ; RV64IZBS-NEXT:    lui a0, 1045887
 ; RV64IZBS-NEXT:    bclri a0, a0, 31
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: PR54812:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048447
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1407
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2158497792;
 }
 
@@ -2452,6 +2908,12 @@ define signext i32 @pos_2048() nounwind {
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    bseti a0, zero, 11
 ; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_2048:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 11
+; RV64IXTHEADBB-NEXT:    ret
   ret i32 2048
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index f735d21775114..132e73e080afb 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -7,6 +7,10 @@
 ; RUN:   | FileCheck %s -check-prefix=RV32ZBB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64ZBB
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32XTHEADBB
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64XTHEADBB
 
 ; NOTE: -enable-legalize-types-checking is on one command line due to a previous
 ; assertion failure on an expensive checks build for @rotr_32_mask_multiple.
@@ -40,6 +44,22 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rolw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sllw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 32, %y
   %b = shl i32 %x, %y
   %c = lshr i32 %x, %z
@@ -73,6 +93,22 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rorw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    sll a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srlw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 32, %y
   %b = lshr i32 %x, %y
   %c = shl i32 %x, %z
@@ -177,6 +213,56 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rol a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a4, a0, a2
+; RV32XTHEADBB-NEXT:    addi a3, a2, -32
+; RV32XTHEADBB-NEXT:    slti a5, a3, 0
+; RV32XTHEADBB-NEXT:    neg a5, a5
+; RV32XTHEADBB-NEXT:    bltz a3, .LBB2_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a4
+; RV32XTHEADBB-NEXT:    j .LBB2_3
+; RV32XTHEADBB-NEXT:  .LBB2_2:
+; RV32XTHEADBB-NEXT:    sll a3, a1, a2
+; RV32XTHEADBB-NEXT:    not a6, a2
+; RV32XTHEADBB-NEXT:    srli a7, a0, 1
+; RV32XTHEADBB-NEXT:    srl a6, a7, a6
+; RV32XTHEADBB-NEXT:    or a3, a3, a6
+; RV32XTHEADBB-NEXT:  .LBB2_3:
+; RV32XTHEADBB-NEXT:    and a4, a5, a4
+; RV32XTHEADBB-NEXT:    neg a7, a2
+; RV32XTHEADBB-NEXT:    li a5, 32
+; RV32XTHEADBB-NEXT:    sub a6, a5, a2
+; RV32XTHEADBB-NEXT:    srl a5, a1, a7
+; RV32XTHEADBB-NEXT:    bltz a6, .LBB2_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    mv a0, a5
+; RV32XTHEADBB-NEXT:    j .LBB2_6
+; RV32XTHEADBB-NEXT:  .LBB2_5:
+; RV32XTHEADBB-NEXT:    srl a0, a0, a7
+; RV32XTHEADBB-NEXT:    li a7, 64
+; RV32XTHEADBB-NEXT:    sub a2, a7, a2
+; RV32XTHEADBB-NEXT:    not a2, a2
+; RV32XTHEADBB-NEXT:    slli a1, a1, 1
+; RV32XTHEADBB-NEXT:    sll a1, a1, a2
+; RV32XTHEADBB-NEXT:    or a0, a0, a1
+; RV32XTHEADBB-NEXT:  .LBB2_6:
+; RV32XTHEADBB-NEXT:    slti a1, a6, 0
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    and a1, a1, a5
+; RV32XTHEADBB-NEXT:    or a1, a3, a1
+; RV32XTHEADBB-NEXT:    or a0, a4, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sll a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srl a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i64 64, %y
   %b = shl i64 %x, %y
   %c = lshr i64 %x, %z
@@ -281,6 +367,56 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ror a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a4, a1, a2
+; RV32XTHEADBB-NEXT:    addi a3, a2, -32
+; RV32XTHEADBB-NEXT:    slti a5, a3, 0
+; RV32XTHEADBB-NEXT:    neg a5, a5
+; RV32XTHEADBB-NEXT:    bltz a3, .LBB3_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a4
+; RV32XTHEADBB-NEXT:    j .LBB3_3
+; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    srl a3, a0, a2
+; RV32XTHEADBB-NEXT:    not a6, a2
+; RV32XTHEADBB-NEXT:    slli a7, a1, 1
+; RV32XTHEADBB-NEXT:    sll a6, a7, a6
+; RV32XTHEADBB-NEXT:    or a3, a3, a6
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    and a4, a5, a4
+; RV32XTHEADBB-NEXT:    neg a7, a2
+; RV32XTHEADBB-NEXT:    li a5, 32
+; RV32XTHEADBB-NEXT:    sub a6, a5, a2
+; RV32XTHEADBB-NEXT:    sll a5, a0, a7
+; RV32XTHEADBB-NEXT:    bltz a6, .LBB3_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    mv a1, a5
+; RV32XTHEADBB-NEXT:    j .LBB3_6
+; RV32XTHEADBB-NEXT:  .LBB3_5:
+; RV32XTHEADBB-NEXT:    sll a1, a1, a7
+; RV32XTHEADBB-NEXT:    li a7, 64
+; RV32XTHEADBB-NEXT:    sub a2, a7, a2
+; RV32XTHEADBB-NEXT:    not a2, a2
+; RV32XTHEADBB-NEXT:    srli a0, a0, 1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a2
+; RV32XTHEADBB-NEXT:    or a1, a1, a0
+; RV32XTHEADBB-NEXT:  .LBB3_6:
+; RV32XTHEADBB-NEXT:    slti a0, a6, 0
+; RV32XTHEADBB-NEXT:    neg a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a5
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    or a1, a4, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srl a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sll a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i64 64, %y
   %b = lshr i64 %x, %y
   %c = shl i64 %x, %z
@@ -314,6 +450,22 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rolw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sllw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 0, %y
   %and = and i32 %z, 31
   %b = shl i32 %x, %y
@@ -348,6 +500,22 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rolw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sllw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = and i32 %y, 63
   %b = shl i32 %x, %a
   %c = sub i32 0, %y
@@ -385,6 +553,22 @@ define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rolw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_or_64_or_32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_or_64_or_32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sllw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = or i32 %y, 64
   %b = shl i32 %x, %a
   %c = sub i32 0, %y
@@ -420,6 +604,22 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rorw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    sll a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srlw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 0, %y
   %and = and i32 %z, 31
   %b = lshr i32 %x, %y
@@ -454,6 +654,22 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rorw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    sll a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srlw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = and i32 %y, 63
   %b = lshr i32 %x, %a
   %c = sub i32 0, %y
@@ -491,6 +707,22 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rorw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_or_64_or_32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a2, a0, a1
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    sll a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_or_64_or_32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srlw a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = or i32 %y, 64
   %b = lshr i32 %x, %a
   %c = sub i32 0, %y
@@ -593,6 +825,54 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rol a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    addi a5, a2, -32
+; RV32XTHEADBB-NEXT:    sll a4, a0, a2
+; RV32XTHEADBB-NEXT:    bltz a5, .LBB10_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a4
+; RV32XTHEADBB-NEXT:    j .LBB10_3
+; RV32XTHEADBB-NEXT:  .LBB10_2:
+; RV32XTHEADBB-NEXT:    sll a3, a1, a2
+; RV32XTHEADBB-NEXT:    not a6, a2
+; RV32XTHEADBB-NEXT:    srli a7, a0, 1
+; RV32XTHEADBB-NEXT:    srl a6, a7, a6
+; RV32XTHEADBB-NEXT:    or a3, a3, a6
+; RV32XTHEADBB-NEXT:  .LBB10_3:
+; RV32XTHEADBB-NEXT:    slti a5, a5, 0
+; RV32XTHEADBB-NEXT:    neg a5, a5
+; RV32XTHEADBB-NEXT:    and a4, a5, a4
+; RV32XTHEADBB-NEXT:    neg a6, a2
+; RV32XTHEADBB-NEXT:    srl a2, a1, a6
+; RV32XTHEADBB-NEXT:    andi a5, a6, 63
+; RV32XTHEADBB-NEXT:    addi a7, a5, -32
+; RV32XTHEADBB-NEXT:    slti t0, a7, 0
+; RV32XTHEADBB-NEXT:    neg t0, t0
+; RV32XTHEADBB-NEXT:    and a2, t0, a2
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB10_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    srl a0, a1, a5
+; RV32XTHEADBB-NEXT:    j .LBB10_6
+; RV32XTHEADBB-NEXT:  .LBB10_5:
+; RV32XTHEADBB-NEXT:    srl a0, a0, a6
+; RV32XTHEADBB-NEXT:    not a5, a5
+; RV32XTHEADBB-NEXT:    slli a1, a1, 1
+; RV32XTHEADBB-NEXT:    sll a1, a1, a5
+; RV32XTHEADBB-NEXT:    or a0, a0, a1
+; RV32XTHEADBB-NEXT:  .LBB10_6:
+; RV32XTHEADBB-NEXT:    or a0, a4, a0
+; RV32XTHEADBB-NEXT:    or a1, a3, a2
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sll a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srl a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i64 0, %y
   %and = and i64 %z, 63
   %b = shl i64 %x, %y
@@ -696,6 +976,55 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rol a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a3, a2, 127
+; RV32XTHEADBB-NEXT:    addi a4, a3, -32
+; RV32XTHEADBB-NEXT:    bltz a4, .LBB11_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    sll a3, a0, a3
+; RV32XTHEADBB-NEXT:    j .LBB11_3
+; RV32XTHEADBB-NEXT:  .LBB11_2:
+; RV32XTHEADBB-NEXT:    sll a5, a1, a2
+; RV32XTHEADBB-NEXT:    srli a6, a0, 1
+; RV32XTHEADBB-NEXT:    not a3, a3
+; RV32XTHEADBB-NEXT:    srl a3, a6, a3
+; RV32XTHEADBB-NEXT:    or a3, a5, a3
+; RV32XTHEADBB-NEXT:  .LBB11_3:
+; RV32XTHEADBB-NEXT:    sll a5, a0, a2
+; RV32XTHEADBB-NEXT:    slti a4, a4, 0
+; RV32XTHEADBB-NEXT:    neg a4, a4
+; RV32XTHEADBB-NEXT:    and a4, a4, a5
+; RV32XTHEADBB-NEXT:    neg a6, a2
+; RV32XTHEADBB-NEXT:    srl a2, a1, a6
+; RV32XTHEADBB-NEXT:    andi a5, a6, 63
+; RV32XTHEADBB-NEXT:    addi a7, a5, -32
+; RV32XTHEADBB-NEXT:    slti t0, a7, 0
+; RV32XTHEADBB-NEXT:    neg t0, t0
+; RV32XTHEADBB-NEXT:    and a2, t0, a2
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB11_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    srl a0, a1, a5
+; RV32XTHEADBB-NEXT:    j .LBB11_6
+; RV32XTHEADBB-NEXT:  .LBB11_5:
+; RV32XTHEADBB-NEXT:    srl a0, a0, a6
+; RV32XTHEADBB-NEXT:    not a5, a5
+; RV32XTHEADBB-NEXT:    slli a1, a1, 1
+; RV32XTHEADBB-NEXT:    sll a1, a1, a5
+; RV32XTHEADBB-NEXT:    or a0, a0, a1
+; RV32XTHEADBB-NEXT:  .LBB11_6:
+; RV32XTHEADBB-NEXT:    or a0, a4, a0
+; RV32XTHEADBB-NEXT:    or a1, a3, a2
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sll a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srl a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = and i64 %y, 127
   %b = shl i64 %x, %a
   %c = sub i64 0, %y
@@ -736,6 +1065,22 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rol a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_or_128_or_64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    sll a3, a0, a2
+; RV32XTHEADBB-NEXT:    neg a0, a2
+; RV32XTHEADBB-NEXT:    srl a0, a1, a0
+; RV32XTHEADBB-NEXT:    mv a1, a3
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_or_128_or_64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sll a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srl a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = or i64 %y, 128
   %b = shl i64 %x, %a
   %c = sub i64 0, %y
@@ -838,6 +1183,54 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ror a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a4, a1, a2
+; RV32XTHEADBB-NEXT:    addi a3, a2, -32
+; RV32XTHEADBB-NEXT:    slti a5, a3, 0
+; RV32XTHEADBB-NEXT:    neg a5, a5
+; RV32XTHEADBB-NEXT:    bltz a3, .LBB13_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a4
+; RV32XTHEADBB-NEXT:    j .LBB13_3
+; RV32XTHEADBB-NEXT:  .LBB13_2:
+; RV32XTHEADBB-NEXT:    srl a3, a0, a2
+; RV32XTHEADBB-NEXT:    not a6, a2
+; RV32XTHEADBB-NEXT:    slli a7, a1, 1
+; RV32XTHEADBB-NEXT:    sll a6, a7, a6
+; RV32XTHEADBB-NEXT:    or a3, a3, a6
+; RV32XTHEADBB-NEXT:  .LBB13_3:
+; RV32XTHEADBB-NEXT:    neg a6, a2
+; RV32XTHEADBB-NEXT:    andi t0, a6, 63
+; RV32XTHEADBB-NEXT:    addi a7, t0, -32
+; RV32XTHEADBB-NEXT:    and a2, a5, a4
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB13_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    sll a1, a0, t0
+; RV32XTHEADBB-NEXT:    j .LBB13_6
+; RV32XTHEADBB-NEXT:  .LBB13_5:
+; RV32XTHEADBB-NEXT:    sll a1, a1, a6
+; RV32XTHEADBB-NEXT:    not a4, t0
+; RV32XTHEADBB-NEXT:    srli a5, a0, 1
+; RV32XTHEADBB-NEXT:    srl a4, a5, a4
+; RV32XTHEADBB-NEXT:    or a1, a1, a4
+; RV32XTHEADBB-NEXT:  .LBB13_6:
+; RV32XTHEADBB-NEXT:    sll a0, a0, a6
+; RV32XTHEADBB-NEXT:    slti a4, a7, 0
+; RV32XTHEADBB-NEXT:    neg a4, a4
+; RV32XTHEADBB-NEXT:    and a0, a4, a0
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    or a1, a2, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srl a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sll a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i64 0, %y
   %and = and i64 %z, 63
   %b = lshr i64 %x, %y
@@ -941,6 +1334,55 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ror a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a4, a1, a2
+; RV32XTHEADBB-NEXT:    andi a3, a2, 127
+; RV32XTHEADBB-NEXT:    addi a6, a3, -32
+; RV32XTHEADBB-NEXT:    slti a5, a6, 0
+; RV32XTHEADBB-NEXT:    neg a5, a5
+; RV32XTHEADBB-NEXT:    bltz a6, .LBB14_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    srl a3, a1, a3
+; RV32XTHEADBB-NEXT:    j .LBB14_3
+; RV32XTHEADBB-NEXT:  .LBB14_2:
+; RV32XTHEADBB-NEXT:    srl a6, a0, a2
+; RV32XTHEADBB-NEXT:    slli a7, a1, 1
+; RV32XTHEADBB-NEXT:    not a3, a3
+; RV32XTHEADBB-NEXT:    sll a3, a7, a3
+; RV32XTHEADBB-NEXT:    or a3, a6, a3
+; RV32XTHEADBB-NEXT:  .LBB14_3:
+; RV32XTHEADBB-NEXT:    neg a6, a2
+; RV32XTHEADBB-NEXT:    andi t0, a6, 63
+; RV32XTHEADBB-NEXT:    addi a7, t0, -32
+; RV32XTHEADBB-NEXT:    and a2, a5, a4
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB14_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    sll a1, a0, t0
+; RV32XTHEADBB-NEXT:    j .LBB14_6
+; RV32XTHEADBB-NEXT:  .LBB14_5:
+; RV32XTHEADBB-NEXT:    sll a1, a1, a6
+; RV32XTHEADBB-NEXT:    not a4, t0
+; RV32XTHEADBB-NEXT:    srli a5, a0, 1
+; RV32XTHEADBB-NEXT:    srl a4, a5, a4
+; RV32XTHEADBB-NEXT:    or a1, a1, a4
+; RV32XTHEADBB-NEXT:  .LBB14_6:
+; RV32XTHEADBB-NEXT:    sll a0, a0, a6
+; RV32XTHEADBB-NEXT:    slti a4, a7, 0
+; RV32XTHEADBB-NEXT:    neg a4, a4
+; RV32XTHEADBB-NEXT:    and a0, a4, a0
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    or a1, a2, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srl a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sll a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = and i64 %y, 127
   %b = lshr i64 %x, %a
   %c = sub i64 0, %y
@@ -981,6 +1423,22 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ror a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_or_128_or_64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    srl a3, a1, a2
+; RV32XTHEADBB-NEXT:    neg a1, a2
+; RV32XTHEADBB-NEXT:    sll a1, a0, a1
+; RV32XTHEADBB-NEXT:    mv a0, a3
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_or_128_or_64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srl a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sll a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %a = or i64 %y, 128
   %b = lshr i64 %x, %a
   %c = sub i64 0, %y
@@ -1026,6 +1484,27 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
 ; RV64ZBB-NEXT:    sllw a1, a1, a2
 ; RV64ZBB-NEXT:    addw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_shared:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a3, a2, 31
+; RV32XTHEADBB-NEXT:    sll a4, a0, a3
+; RV32XTHEADBB-NEXT:    neg a3, a3
+; RV32XTHEADBB-NEXT:    srl a0, a0, a3
+; RV32XTHEADBB-NEXT:    or a0, a4, a0
+; RV32XTHEADBB-NEXT:    sll a1, a1, a2
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_shared:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sllw a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    sllw a1, a1, a2
+; RV64XTHEADBB-NEXT:    addw a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i32 %amt, 31
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
   %2 = shl i32 %b, %maskedamt
@@ -1141,6 +1620,62 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
 ; RV64ZBB-NEXT:    sll a1, a1, a2
 ; RV64ZBB-NEXT:    add a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_shared:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    slli a5, a4, 26
+; RV32XTHEADBB-NEXT:    srli a5, a5, 31
+; RV32XTHEADBB-NEXT:    mv a7, a0
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB17_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a7, a1
+; RV32XTHEADBB-NEXT:  .LBB17_2:
+; RV32XTHEADBB-NEXT:    andi a6, a4, 63
+; RV32XTHEADBB-NEXT:    sll t0, a7, a4
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB17_4
+; RV32XTHEADBB-NEXT:  # %bb.3:
+; RV32XTHEADBB-NEXT:    mv a1, a0
+; RV32XTHEADBB-NEXT:  .LBB17_4:
+; RV32XTHEADBB-NEXT:    srli a0, a1, 1
+; RV32XTHEADBB-NEXT:    not t1, a4
+; RV32XTHEADBB-NEXT:    srl a0, a0, t1
+; RV32XTHEADBB-NEXT:    or a5, t0, a0
+; RV32XTHEADBB-NEXT:    sll a1, a1, a4
+; RV32XTHEADBB-NEXT:    srli a0, a7, 1
+; RV32XTHEADBB-NEXT:    srl a7, a0, t1
+; RV32XTHEADBB-NEXT:    addi a0, a6, -32
+; RV32XTHEADBB-NEXT:    or a1, a1, a7
+; RV32XTHEADBB-NEXT:    bltz a0, .LBB17_6
+; RV32XTHEADBB-NEXT:  # %bb.5:
+; RV32XTHEADBB-NEXT:    sll a3, a2, a6
+; RV32XTHEADBB-NEXT:    j .LBB17_7
+; RV32XTHEADBB-NEXT:  .LBB17_6:
+; RV32XTHEADBB-NEXT:    sll a3, a3, a4
+; RV32XTHEADBB-NEXT:    srli a7, a2, 1
+; RV32XTHEADBB-NEXT:    not a6, a6
+; RV32XTHEADBB-NEXT:    srl a6, a7, a6
+; RV32XTHEADBB-NEXT:    or a3, a3, a6
+; RV32XTHEADBB-NEXT:  .LBB17_7:
+; RV32XTHEADBB-NEXT:    sll a2, a2, a4
+; RV32XTHEADBB-NEXT:    slti a0, a0, 0
+; RV32XTHEADBB-NEXT:    neg a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a2
+; RV32XTHEADBB-NEXT:    add a0, a1, a0
+; RV32XTHEADBB-NEXT:    sltu a1, a0, a1
+; RV32XTHEADBB-NEXT:    add a3, a5, a3
+; RV32XTHEADBB-NEXT:    add a1, a3, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_shared:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a3, a2, 63
+; RV64XTHEADBB-NEXT:    sll a4, a0, a3
+; RV64XTHEADBB-NEXT:    neg a3, a3
+; RV64XTHEADBB-NEXT:    srl a0, a0, a3
+; RV64XTHEADBB-NEXT:    or a0, a4, a0
+; RV64XTHEADBB-NEXT:    sll a1, a1, a2
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i64 %amt, 63
   %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
   %2 = shl i64 %b, %maskedamt
@@ -1183,6 +1718,27 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
 ; RV64ZBB-NEXT:    sllw a1, a1, a2
 ; RV64ZBB-NEXT:    addw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_shared:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a3, a2, 31
+; RV32XTHEADBB-NEXT:    srl a4, a0, a3
+; RV32XTHEADBB-NEXT:    neg a3, a3
+; RV32XTHEADBB-NEXT:    sll a0, a0, a3
+; RV32XTHEADBB-NEXT:    or a0, a4, a0
+; RV32XTHEADBB-NEXT:    sll a1, a1, a2
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_shared:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srlw a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    sllw a1, a1, a2
+; RV64XTHEADBB-NEXT:    addw a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i32 %amt, 31
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
   %2 = shl i32 %b, %maskedamt
@@ -1296,6 +1852,61 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
 ; RV64ZBB-NEXT:    sll a1, a1, a2
 ; RV64ZBB-NEXT:    add a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_shared:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a7, a4, 32
+; RV32XTHEADBB-NEXT:    mv a6, a1
+; RV32XTHEADBB-NEXT:    beqz a7, .LBB19_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a6, a0
+; RV32XTHEADBB-NEXT:  .LBB19_2:
+; RV32XTHEADBB-NEXT:    andi a5, a4, 63
+; RV32XTHEADBB-NEXT:    srl t0, a6, a4
+; RV32XTHEADBB-NEXT:    beqz a7, .LBB19_4
+; RV32XTHEADBB-NEXT:  # %bb.3:
+; RV32XTHEADBB-NEXT:    mv a0, a1
+; RV32XTHEADBB-NEXT:  .LBB19_4:
+; RV32XTHEADBB-NEXT:    slli a1, a0, 1
+; RV32XTHEADBB-NEXT:    not a7, a4
+; RV32XTHEADBB-NEXT:    sll a1, a1, a7
+; RV32XTHEADBB-NEXT:    or a1, a1, t0
+; RV32XTHEADBB-NEXT:    srl t0, a0, a4
+; RV32XTHEADBB-NEXT:    slli a6, a6, 1
+; RV32XTHEADBB-NEXT:    sll a6, a6, a7
+; RV32XTHEADBB-NEXT:    addi a0, a5, -32
+; RV32XTHEADBB-NEXT:    or a6, a6, t0
+; RV32XTHEADBB-NEXT:    bltz a0, .LBB19_6
+; RV32XTHEADBB-NEXT:  # %bb.5:
+; RV32XTHEADBB-NEXT:    sll a3, a2, a5
+; RV32XTHEADBB-NEXT:    j .LBB19_7
+; RV32XTHEADBB-NEXT:  .LBB19_6:
+; RV32XTHEADBB-NEXT:    sll a3, a3, a4
+; RV32XTHEADBB-NEXT:    srli a7, a2, 1
+; RV32XTHEADBB-NEXT:    not a5, a5
+; RV32XTHEADBB-NEXT:    srl a5, a7, a5
+; RV32XTHEADBB-NEXT:    or a3, a3, a5
+; RV32XTHEADBB-NEXT:  .LBB19_7:
+; RV32XTHEADBB-NEXT:    sll a2, a2, a4
+; RV32XTHEADBB-NEXT:    slti a0, a0, 0
+; RV32XTHEADBB-NEXT:    neg a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a2
+; RV32XTHEADBB-NEXT:    add a0, a6, a0
+; RV32XTHEADBB-NEXT:    sltu a2, a0, a6
+; RV32XTHEADBB-NEXT:    add a1, a1, a3
+; RV32XTHEADBB-NEXT:    add a1, a1, a2
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_shared:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a3, a2, 63
+; RV64XTHEADBB-NEXT:    srl a4, a0, a3
+; RV64XTHEADBB-NEXT:    neg a3, a3
+; RV64XTHEADBB-NEXT:    sll a0, a0, a3
+; RV64XTHEADBB-NEXT:    or a0, a4, a0
+; RV64XTHEADBB-NEXT:    sll a1, a1, a2
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i64 %amt, 63
   %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
   %2 = shl i64 %b, %maskedamt
@@ -1342,6 +1953,32 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
 ; RV64ZBB-NEXT:    rolw a1, a1, a2
 ; RV64ZBB-NEXT:    addw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_multiple:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a2, a2, 31
+; RV32XTHEADBB-NEXT:    sll a3, a0, a2
+; RV32XTHEADBB-NEXT:    neg a4, a2
+; RV32XTHEADBB-NEXT:    srl a0, a0, a4
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    sll a2, a1, a2
+; RV32XTHEADBB-NEXT:    srl a1, a1, a4
+; RV32XTHEADBB-NEXT:    or a1, a2, a1
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_multiple:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a2, a2, 31
+; RV64XTHEADBB-NEXT:    sllw a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    srlw a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    sllw a2, a1, a2
+; RV64XTHEADBB-NEXT:    srlw a1, a1, a4
+; RV64XTHEADBB-NEXT:    or a1, a2, a1
+; RV64XTHEADBB-NEXT:    addw a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i32 %amt, 31
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
   %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt)
@@ -1458,6 +2095,64 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
 ; RV64ZBB-NEXT:    rol a1, a1, a2
 ; RV64ZBB-NEXT:    add a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_multiple:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    slli a5, a4, 26
+; RV32XTHEADBB-NEXT:    srli a5, a5, 31
+; RV32XTHEADBB-NEXT:    mv a6, a1
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB21_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a6, a0
+; RV32XTHEADBB-NEXT:  .LBB21_2:
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB21_4
+; RV32XTHEADBB-NEXT:  # %bb.3:
+; RV32XTHEADBB-NEXT:    mv a0, a1
+; RV32XTHEADBB-NEXT:  .LBB21_4:
+; RV32XTHEADBB-NEXT:    sll a7, a6, a4
+; RV32XTHEADBB-NEXT:    srli t0, a0, 1
+; RV32XTHEADBB-NEXT:    not a1, a4
+; RV32XTHEADBB-NEXT:    srl t0, t0, a1
+; RV32XTHEADBB-NEXT:    sll t1, a0, a4
+; RV32XTHEADBB-NEXT:    srli a0, a6, 1
+; RV32XTHEADBB-NEXT:    srl t2, a0, a1
+; RV32XTHEADBB-NEXT:    mv a0, a3
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB21_6
+; RV32XTHEADBB-NEXT:  # %bb.5:
+; RV32XTHEADBB-NEXT:    mv a0, a2
+; RV32XTHEADBB-NEXT:  .LBB21_6:
+; RV32XTHEADBB-NEXT:    or a6, a7, t0
+; RV32XTHEADBB-NEXT:    or a7, t1, t2
+; RV32XTHEADBB-NEXT:    sll t0, a0, a4
+; RV32XTHEADBB-NEXT:    bnez a5, .LBB21_8
+; RV32XTHEADBB-NEXT:  # %bb.7:
+; RV32XTHEADBB-NEXT:    mv a2, a3
+; RV32XTHEADBB-NEXT:  .LBB21_8:
+; RV32XTHEADBB-NEXT:    srli a3, a2, 1
+; RV32XTHEADBB-NEXT:    srl a3, a3, a1
+; RV32XTHEADBB-NEXT:    or a3, t0, a3
+; RV32XTHEADBB-NEXT:    sll a2, a2, a4
+; RV32XTHEADBB-NEXT:    srli a0, a0, 1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a1
+; RV32XTHEADBB-NEXT:    or a0, a2, a0
+; RV32XTHEADBB-NEXT:    add a1, a7, a0
+; RV32XTHEADBB-NEXT:    add a0, a6, a3
+; RV32XTHEADBB-NEXT:    sltu a2, a0, a6
+; RV32XTHEADBB-NEXT:    add a1, a1, a2
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_multiple:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a2, a2, 63
+; RV64XTHEADBB-NEXT:    sll a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    srl a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    sll a2, a1, a2
+; RV64XTHEADBB-NEXT:    srl a1, a1, a4
+; RV64XTHEADBB-NEXT:    or a1, a2, a1
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i64 %amt, 63
   %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
   %2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt)
@@ -1503,6 +2198,32 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
 ; RV64ZBB-NEXT:    rorw a1, a1, a2
 ; RV64ZBB-NEXT:    addw a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_multiple:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a2, a2, 31
+; RV32XTHEADBB-NEXT:    srl a3, a0, a2
+; RV32XTHEADBB-NEXT:    neg a4, a2
+; RV32XTHEADBB-NEXT:    sll a0, a0, a4
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    srl a2, a1, a2
+; RV32XTHEADBB-NEXT:    sll a1, a1, a4
+; RV32XTHEADBB-NEXT:    or a1, a2, a1
+; RV32XTHEADBB-NEXT:    add a0, a0, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_multiple:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a2, a2, 31
+; RV64XTHEADBB-NEXT:    srlw a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    sllw a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    srlw a2, a1, a2
+; RV64XTHEADBB-NEXT:    sllw a1, a1, a4
+; RV64XTHEADBB-NEXT:    or a1, a2, a1
+; RV64XTHEADBB-NEXT:    addw a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i32 %amt, 31
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
   %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt)
@@ -1617,6 +2338,63 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
 ; RV64ZBB-NEXT:    ror a1, a1, a2
 ; RV64ZBB-NEXT:    add a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_multiple:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    andi a5, a4, 32
+; RV32XTHEADBB-NEXT:    mv a6, a0
+; RV32XTHEADBB-NEXT:    beqz a5, .LBB23_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a6, a1
+; RV32XTHEADBB-NEXT:  .LBB23_2:
+; RV32XTHEADBB-NEXT:    beqz a5, .LBB23_4
+; RV32XTHEADBB-NEXT:  # %bb.3:
+; RV32XTHEADBB-NEXT:    mv a1, a0
+; RV32XTHEADBB-NEXT:  .LBB23_4:
+; RV32XTHEADBB-NEXT:    srl a7, a6, a4
+; RV32XTHEADBB-NEXT:    slli t0, a1, 1
+; RV32XTHEADBB-NEXT:    not a0, a4
+; RV32XTHEADBB-NEXT:    sll t0, t0, a0
+; RV32XTHEADBB-NEXT:    srl t1, a1, a4
+; RV32XTHEADBB-NEXT:    slli a6, a6, 1
+; RV32XTHEADBB-NEXT:    sll t2, a6, a0
+; RV32XTHEADBB-NEXT:    mv a6, a2
+; RV32XTHEADBB-NEXT:    beqz a5, .LBB23_6
+; RV32XTHEADBB-NEXT:  # %bb.5:
+; RV32XTHEADBB-NEXT:    mv a6, a3
+; RV32XTHEADBB-NEXT:  .LBB23_6:
+; RV32XTHEADBB-NEXT:    or a1, t0, a7
+; RV32XTHEADBB-NEXT:    or a7, t2, t1
+; RV32XTHEADBB-NEXT:    srl t0, a6, a4
+; RV32XTHEADBB-NEXT:    beqz a5, .LBB23_8
+; RV32XTHEADBB-NEXT:  # %bb.7:
+; RV32XTHEADBB-NEXT:    mv a3, a2
+; RV32XTHEADBB-NEXT:  .LBB23_8:
+; RV32XTHEADBB-NEXT:    slli a2, a3, 1
+; RV32XTHEADBB-NEXT:    sll a2, a2, a0
+; RV32XTHEADBB-NEXT:    or a2, a2, t0
+; RV32XTHEADBB-NEXT:    srl a3, a3, a4
+; RV32XTHEADBB-NEXT:    slli a6, a6, 1
+; RV32XTHEADBB-NEXT:    sll a0, a6, a0
+; RV32XTHEADBB-NEXT:    or a0, a0, a3
+; RV32XTHEADBB-NEXT:    add a7, a7, a0
+; RV32XTHEADBB-NEXT:    add a0, a1, a2
+; RV32XTHEADBB-NEXT:    sltu a1, a0, a1
+; RV32XTHEADBB-NEXT:    add a1, a7, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_multiple:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    andi a2, a2, 63
+; RV64XTHEADBB-NEXT:    srl a3, a0, a2
+; RV64XTHEADBB-NEXT:    neg a4, a2
+; RV64XTHEADBB-NEXT:    sll a0, a0, a4
+; RV64XTHEADBB-NEXT:    or a0, a3, a0
+; RV64XTHEADBB-NEXT:    srl a2, a1, a2
+; RV64XTHEADBB-NEXT:    sll a1, a1, a4
+; RV64XTHEADBB-NEXT:    or a1, a2, a1
+; RV64XTHEADBB-NEXT:    add a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
   %maskedamt = and i64 %amt, 63
   %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
   %2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt)
@@ -1721,6 +2499,56 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    rol a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_zext:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    neg a4, a2
+; RV32XTHEADBB-NEXT:    sll a5, a0, a2
+; RV32XTHEADBB-NEXT:    addi a3, a2, -32
+; RV32XTHEADBB-NEXT:    slti a6, a3, 0
+; RV32XTHEADBB-NEXT:    neg a6, a6
+; RV32XTHEADBB-NEXT:    bltz a3, .LBB24_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a5
+; RV32XTHEADBB-NEXT:    j .LBB24_3
+; RV32XTHEADBB-NEXT:  .LBB24_2:
+; RV32XTHEADBB-NEXT:    sll a3, a1, a2
+; RV32XTHEADBB-NEXT:    not a7, a2
+; RV32XTHEADBB-NEXT:    srli t0, a0, 1
+; RV32XTHEADBB-NEXT:    srl a7, t0, a7
+; RV32XTHEADBB-NEXT:    or a3, a3, a7
+; RV32XTHEADBB-NEXT:  .LBB24_3:
+; RV32XTHEADBB-NEXT:    and a5, a6, a5
+; RV32XTHEADBB-NEXT:    li a6, 32
+; RV32XTHEADBB-NEXT:    sub a7, a6, a2
+; RV32XTHEADBB-NEXT:    srl a6, a1, a4
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB24_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    mv a0, a6
+; RV32XTHEADBB-NEXT:    j .LBB24_6
+; RV32XTHEADBB-NEXT:  .LBB24_5:
+; RV32XTHEADBB-NEXT:    li t0, 64
+; RV32XTHEADBB-NEXT:    sub a2, t0, a2
+; RV32XTHEADBB-NEXT:    srl a0, a0, a4
+; RV32XTHEADBB-NEXT:    not a2, a2
+; RV32XTHEADBB-NEXT:    slli a1, a1, 1
+; RV32XTHEADBB-NEXT:    sll a1, a1, a2
+; RV32XTHEADBB-NEXT:    or a0, a0, a1
+; RV32XTHEADBB-NEXT:  .LBB24_6:
+; RV32XTHEADBB-NEXT:    slti a1, a7, 0
+; RV32XTHEADBB-NEXT:    neg a1, a1
+; RV32XTHEADBB-NEXT:    and a1, a1, a6
+; RV32XTHEADBB-NEXT:    or a1, a3, a1
+; RV32XTHEADBB-NEXT:    or a0, a5, a0
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_zext:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    sll a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    srl a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 64, %y
   %zext = zext i32 %z to i64
   %zexty = zext i32 %y to i64
@@ -1827,6 +2655,56 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    ror a0, a0, a1
 ; RV64ZBB-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_zext:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    neg a4, a2
+; RV32XTHEADBB-NEXT:    srl a5, a1, a2
+; RV32XTHEADBB-NEXT:    addi a3, a2, -32
+; RV32XTHEADBB-NEXT:    slti a6, a3, 0
+; RV32XTHEADBB-NEXT:    neg a6, a6
+; RV32XTHEADBB-NEXT:    bltz a3, .LBB25_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    mv a3, a5
+; RV32XTHEADBB-NEXT:    j .LBB25_3
+; RV32XTHEADBB-NEXT:  .LBB25_2:
+; RV32XTHEADBB-NEXT:    srl a3, a0, a2
+; RV32XTHEADBB-NEXT:    not a7, a2
+; RV32XTHEADBB-NEXT:    slli t0, a1, 1
+; RV32XTHEADBB-NEXT:    sll a7, t0, a7
+; RV32XTHEADBB-NEXT:    or a3, a3, a7
+; RV32XTHEADBB-NEXT:  .LBB25_3:
+; RV32XTHEADBB-NEXT:    and a5, a6, a5
+; RV32XTHEADBB-NEXT:    li a6, 32
+; RV32XTHEADBB-NEXT:    sub a7, a6, a2
+; RV32XTHEADBB-NEXT:    sll a6, a0, a4
+; RV32XTHEADBB-NEXT:    bltz a7, .LBB25_5
+; RV32XTHEADBB-NEXT:  # %bb.4:
+; RV32XTHEADBB-NEXT:    mv a1, a6
+; RV32XTHEADBB-NEXT:    j .LBB25_6
+; RV32XTHEADBB-NEXT:  .LBB25_5:
+; RV32XTHEADBB-NEXT:    li t0, 64
+; RV32XTHEADBB-NEXT:    sub a2, t0, a2
+; RV32XTHEADBB-NEXT:    sll a1, a1, a4
+; RV32XTHEADBB-NEXT:    not a2, a2
+; RV32XTHEADBB-NEXT:    srli a0, a0, 1
+; RV32XTHEADBB-NEXT:    srl a0, a0, a2
+; RV32XTHEADBB-NEXT:    or a1, a1, a0
+; RV32XTHEADBB-NEXT:  .LBB25_6:
+; RV32XTHEADBB-NEXT:    slti a0, a7, 0
+; RV32XTHEADBB-NEXT:    neg a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a6
+; RV32XTHEADBB-NEXT:    or a0, a3, a0
+; RV32XTHEADBB-NEXT:    or a1, a5, a1
+; RV32XTHEADBB-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_zext:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srl a2, a0, a1
+; RV64XTHEADBB-NEXT:    neg a1, a1
+; RV64XTHEADBB-NEXT:    sll a0, a0, a1
+; RV64XTHEADBB-NEXT:    or a0, a2, a0
+; RV64XTHEADBB-NEXT:    ret
   %z = sub i32 64, %y
   %zext = zext i32 %z to i64
   %zexty = zext i32 %y to i64

diff  --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
new file mode 100644
index 0000000000000..2e3156d8e7c3b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -0,0 +1,453 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i32 @ctlz_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctlz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a0, .LBB0_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a2, a0, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB0_2:
+; RV32I-NEXT:    li a0, 32
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: ctlz_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctlz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    srli a0, a1, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi s4, a2, 1365
+; RV32I-NEXT:    and a1, a1, s4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi s5, a1, 819
+; RV32I-NEXT:    and a1, a0, s5
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s5
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi s6, a1, -241
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi s3, a1, 257
+; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    srli a0, s2, 1
+; RV32I-NEXT:    or a0, s2, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, s4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, s5
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s5
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    bnez s0, .LBB1_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    j .LBB1_3
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    srli a0, s1, 24
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: ctlz_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a1, .LBB1_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    addi a0, a0, 32
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB1_2:
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a1
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i32 @cttz_i32(i32 %a) nounwind {
+; RV32I-LABEL: cttz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a0, .LBB2_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    lui a1, 30667
+; RV32I-NEXT:    addi a1, a1, 1329
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI2_0)
+; RV32I-NEXT:    addi a1, a1, %lo(.LCPI2_0)
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    lbu a0, 0(a0)
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB2_2:
+; RV32I-NEXT:    li a0, 32
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: cttz_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    beqz a0, .LBB2_2
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB2_2:
+; RV32XTHEADBB-NEXT:    li a0, 32
+; RV32XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV32I-LABEL: cttz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    lui a1, 30667
+; RV32I-NEXT:    addi s3, a1, 1329
+; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s2
+; RV32I-NEXT:    and a0, s2, a0
+; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    call __mulsi3 at plt
+; RV32I-NEXT:    bnez s2, .LBB3_3
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    li a0, 32
+; RV32I-NEXT:    beqz s0, .LBB3_4
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    srli s1, s1, 27
+; RV32I-NEXT:    add s1, s4, s1
+; RV32I-NEXT:    lbu a0, 0(s1)
+; RV32I-NEXT:    j .LBB3_5
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    lbu a0, 0(a0)
+; RV32I-NEXT:    bnez s0, .LBB3_2
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:  .LBB3_5:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: cttz_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_2
+; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    addi a0, a1, -1
+; RV32XTHEADBB-NEXT:    not a1, a1
+; RV32XTHEADBB-NEXT:    and a0, a1, a0
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 64
+; RV32XTHEADBB-NEXT:    j .LBB3_3
+; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    addi a1, a0, -1
+; RV32XTHEADBB-NEXT:    not a0, a0
+; RV32XTHEADBB-NEXT:    and a0, a0, a1
+; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV32XTHEADBB-NEXT:    li a1, 32
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    sub a0, a1, a0
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+define i32 @sextb_i32(i32 %a) nounwind {
+; RV32I-LABEL: sextb_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srai a0, a0, 24
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: sextb_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV32XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV32I-LABEL: sextb_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a0, a1, 24
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: sextb_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV32XTHEADBB-NEXT:    srai a1, a0, 31
+; RV32XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define i32 @sexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: sexth_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: sexth_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: sexth_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a0, a1, 16
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: sexth_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    srai a1, a0, 31
+; RV32XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: zexth_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: zexth_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: zexth_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: zexth_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i32 @bswap_i32(i32 %a) nounwind {
+; RV32I-LABEL: bswap_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    and a2, a0, a2
+; RV32I-NEXT:    slli a2, a2, 8
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: bswap_i32:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.rev a0, a0
+; RV32XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV32I-LABEL: bswap_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    and a4, a1, a3
+; RV32I-NEXT:    slli a4, a4, 8
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a2, a1, a2
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a4, a0, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    and a3, a0, a3
+; RV32I-NEXT:    slli a3, a3, 8
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32XTHEADBB-LABEL: bswap_i64:
+; RV32XTHEADBB:       # %bb.0:
+; RV32XTHEADBB-NEXT:    th.rev a2, a1
+; RV32XTHEADBB-NEXT:    th.rev a1, a0
+; RV32XTHEADBB-NEXT:    mv a0, a2
+; RV32XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
new file mode 100644
index 0000000000000..6b032d39d9f83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -0,0 +1,768 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    j .LBB1_3
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:  .LBB1_3: # %cond.end
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 31
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  %2 = sub i32 31, %1
+  ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    li s0, 32
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    beqz a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a1, a0, 24
+; RV64I-NEXT:  .LBB2_2: # %cond.end
+; RV64I-NEXT:    sub a0, s0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_ceil_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addiw a0, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 32
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = sub i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %3 = sub i32 32, %2
+  ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    or a0, s0, a0
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    xori a0, a0, 31
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findLastSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    not a1, a0
+; RV64XTHEADBB-NEXT:    slli a1, a1, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a1, a1
+; RV64XTHEADBB-NEXT:    xori a1, a1, 31
+; RV64XTHEADBB-NEXT:    snez a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    or a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %2 = xor i32 31, %1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 -1, i32 %2
+  ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    lui a1, %hi(.LCPI5_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI5_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI5_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI5_1)(a2)
+; RV64I-NEXT:    srli a3, a0, 1
+; RV64I-NEXT:    and a1, a3, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    and a1, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI5_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI5_2)(a2)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI5_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI5_3)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB6_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB6_2:
+; RV64XTHEADBB-NEXT:    li a0, 32
+; RV64XTHEADBB-NEXT:    ret
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findFirstSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a2, a0
+; RV64XTHEADBB-NEXT:    and a1, a2, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
+; RV64XTHEADBB-NEXT:    li a2, 64
+; RV64XTHEADBB-NEXT:    sub a2, a2, a1
+; RV64XTHEADBB-NEXT:    snez a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    or a0, a0, a2
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = icmp eq i32 %a, 0
+  %3 = select i1 %2, i32 -1, i32 %1
+  ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    seqz a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ffs_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a2, a0
+; RV64XTHEADBB-NEXT:    and a1, a2, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
+; RV64XTHEADBB-NEXT:    li a2, 65
+; RV64XTHEADBB-NEXT:    sub a2, a2, a1
+; RV64XTHEADBB-NEXT:    seqz a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    and a0, a0, a2
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = add i32 %1, 1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 0, i32 %2
+  ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 58
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB10_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB10_2:
+; RV64XTHEADBB-NEXT:    li a0, 64
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 8
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addiw a3, a3, -256
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    and a3, a0, a3
+; RV64I-NEXT:    slli a3, a3, 8
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32_nosext:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    sw a0, 0(a1)
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %1, ptr %x
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    srli a5, a0, 8
+; RV64I-NEXT:    srliw a5, a5, 24
+; RV64I-NEXT:    slli a5, a5, 24
+; RV64I-NEXT:    or a3, a5, a3
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    and a4, a0, a4
+; RV64I-NEXT:    slli a4, a4, 24
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a3, a4, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 40
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.rev a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}


        


More information about the cfe-commits mailing list