[llvm] 19a5909 - [RISCV] Add vendor-defined XTheadBb (basic bit-manipulation) extension
Philipp Tomsich via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 7 22:57:43 PST 2023
Author: Philipp Tomsich
Date: 2023-02-08T07:57:27+01:00
New Revision: 19a59099095b3cbc9846e5330de26fca0a44ccbe
URL: https://github.com/llvm/llvm-project/commit/19a59099095b3cbc9846e5330de26fca0a44ccbe
DIFF: https://github.com/llvm/llvm-project/commit/19a59099095b3cbc9846e5330de26fca0a44ccbe.diff
LOG: [RISCV] Add vendor-defined XTheadBb (basic bit-manipulation) extension
The vendor-defined XTHeadBb (predating the standard Zbb extension)
extension adds some bit-manipulation extensions with somewhat similar
semantics as some of the Zbb instructions.
It is supported by the C9xx cores (e.g., found in the wild in the
Allwinner D1) by Alibaba T-Head.
The current (as of this commit) public documentation for XTHeadBb is
available from:
https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf
Support for these instructions has already landed in GNU Binutils:
https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=8254c3d2c94ae5458095ea6c25446ba89134b9da
Depends on D143036
Differential Revision: https://reviews.llvm.org/D143439
Added:
clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
Modified:
clang/include/clang/Basic/BuiltinsRISCV.def
llvm/docs/RISCVUsage.rst
llvm/lib/Support/RISCVISAInfo.cpp
llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
llvm/test/CodeGen/RISCV/imm.ll
llvm/test/CodeGen/RISCV/rotl-rotr.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def
index c26e3b8073703..3ca7654a32adc 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.def
+++ b/clang/include/clang/Basic/BuiltinsRISCV.def
@@ -18,8 +18,8 @@
// Zbb extension
TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb")
TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit")
-TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb")
-TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb,64bit")
+TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb|xtheadbb")
+TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb|xtheadbb,64bit")
TARGET_BUILTIN(__builtin_riscv_ctz_32, "ZiZi", "nc", "zbb")
TARGET_BUILTIN(__builtin_riscv_ctz_64, "WiWi", "nc", "zbb,64bit")
diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
new file mode 100644
index 0000000000000..915dd806d2179
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c
@@ -0,0 +1,28 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +xtheadbb -emit-llvm %s -o - \
+// RUN: | FileCheck %s -check-prefix=RV32XTHEADBB
+
+// RV32XTHEADBB-LABEL: @clz_32(
+// RV32XTHEADBB-NEXT: entry:
+// RV32XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// RV32XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+// RV32XTHEADBB-NEXT: ret i32 [[TMP1]]
+//
+int clz_32(int a) {
+ return __builtin_riscv_clz_32(a);
+}
+
+// RV32XTHEADBB-LABEL: @clo_32(
+// RV32XTHEADBB-NEXT: entry:
+// RV32XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// RV32XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV32XTHEADBB-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1
+// RV32XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false)
+// RV32XTHEADBB-NEXT: ret i32 [[TMP1]]
+//
+int clo_32(int a) {
+ return __builtin_riscv_clz_32(~a);
+}
diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
new file mode 100644
index 0000000000000..3b6ef569e6b85
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c
@@ -0,0 +1,53 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadbb -emit-llvm %s -o - \
+// RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
+
+// RV64XTHEADBB-LABEL: @clz_32(
+// RV64XTHEADBB-NEXT: entry:
+// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// RV64XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+// RV64XTHEADBB-NEXT: ret i32 [[TMP1]]
+//
+int clz_32(int a) {
+ return __builtin_riscv_clz_32(a);
+}
+
+// RV64XTHEADBB-LABEL: @clo_32(
+// RV64XTHEADBB-NEXT: entry:
+// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// RV64XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// RV64XTHEADBB-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1
+// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false)
+// RV64XTHEADBB-NEXT: ret i32 [[TMP1]]
+//
+int clo_32(int a) {
+ return __builtin_riscv_clz_32(~a);
+}
+
+// RV64XTHEADBB-LABEL: @clz_64(
+// RV64XTHEADBB-NEXT: entry:
+// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// RV64XTHEADBB-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP0]], i1 false)
+// RV64XTHEADBB-NEXT: ret i64 [[TMP1]]
+//
+long clz_64(long a) {
+ return __builtin_riscv_clz_64(a);
+}
+
+// RV64XTHEADBB-LABEL: @clo_64(
+// RV64XTHEADBB-NEXT: entry:
+// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// RV64XTHEADBB-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// RV64XTHEADBB-NEXT: [[NOT:%.*]] = xor i64 [[TMP0]], -1
+// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[NOT]], i1 false)
+// RV64XTHEADBB-NEXT: ret i64 [[TMP1]]
+//
+long clo_64(long a) {
+ return __builtin_riscv_clz_64(~a);
+}
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index a81896d89f0d2..512af92ec8d9c 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -172,6 +172,9 @@ The current vendor extensions supported are:
``XTHeadBa``
LLVM implements `the THeadBa (address-generation) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
+``XTHeadBb``
+ LLVM implements `the THeadBb (basic bit-manipulation) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
+
``XTHeadBs``
LLVM implements `the THeadBs (single-bit operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification.
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 92c15885b225a..70095d0836719 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -110,6 +110,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
// vendor-defined ('X') extensions
{"xtheadba", RISCVExtensionVersion{1, 0}},
+ {"xtheadbb", RISCVExtensionVersion{1, 0}},
{"xtheadbs", RISCVExtensionVersion{1, 0}},
{"xtheadvdot", RISCVExtensionVersion{1, 0}},
{"xventanacondops", RISCVExtensionVersion{1, 0}},
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7276d8240520b..647607a6664da 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -478,6 +478,13 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Result != MCDisassembler::Fail)
return Result;
}
+ if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBb]) {
+ LLVM_DEBUG(dbgs() << "Trying XTHeadBb custom opcode table:\n");
+ Result = decodeInstruction(DecoderTableTHeadBb32, MI, Insn, Address, this,
+ STI);
+ if (Result != MCDisassembler::Fail)
+ return Result;
+ }
if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBs]) {
LLVM_DEBUG(dbgs() << "Trying XTHeadBs custom opcode table:\n");
Result = decodeInstruction(DecoderTableTHeadBs32, MI, Insn, Address, this,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index bc2f6683392c7..57908723530de 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -352,15 +352,20 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
}
- // Perform optimization with rori in the Zbb extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
+ // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
+ // extension.
+ if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] ||
+ ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) {
if (unsigned Rotate = extractRotateInfo(Val)) {
RISCVMatInt::InstSeq TmpSeq;
uint64_t NegImm12 =
((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
assert(isInt<12>(NegImm12));
TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
- TmpSeq.emplace_back(RISCV::RORI, Rotate);
+ TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb]
+ ? RISCV::RORI
+ : RISCV::TH_SRRI,
+ Rotate);
Res = TmpSeq;
}
}
@@ -405,6 +410,7 @@ OpndKind Inst::getOpndKind() const {
case RISCV::RORI:
case RISCV::BSETI:
case RISCV::BCLRI:
+ case RISCV::TH_SRRI:
return RISCVMatInt::RegImm;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 838056f391c9a..e6cd3341198bf 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -470,6 +470,13 @@ def HasVendorXTHeadBa : Predicate<"Subtarget->hasVendorXTHeadBa()">,
AssemblerPredicate<(all_of FeatureVendorXTHeadBa),
"'xtheadba' (T-Head address calculation instructions)">;
+def FeatureVendorXTHeadBb
+ : SubtargetFeature<"xtheadbb", "HasVendorXTHeadBb", "true",
+ "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+def HasVendorXTHeadBb : Predicate<"Subtarget->hasVendorXTHeadBb()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadBb),
+ "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+
def FeatureVendorXTHeadBs
: SubtargetFeature<"xtheadbs", "HasVendorXTHeadBs", "true",
"'xtheadbs' (T-Head single-bit instructions)">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index ec7c4608f16cc..eb6b5c3db962f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1115,11 +1115,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
bool IsANDIOrZExt =
isInt<12>(C2) ||
(C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
+ // With XTHeadBb, we can use TH.EXTU.
+ IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
break;
// If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
// the constant is a simm32.
bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
+ // With XTHeadBb, we can use TH.EXTU.
+ IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
break;
@@ -2397,6 +2401,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
case RISCV::FCVT_S_WU:
case RISCV::FCVT_D_W:
case RISCV::FCVT_D_WU:
+ case RISCV::TH_REVW:
+ case RISCV::TH_SRRIW:
if (Bits < 32)
return false;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c2abb5cd4cb54..a2d89a97cffec 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -233,7 +233,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZbb())
+ if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
if (Subtarget.is64Bit()) {
@@ -280,7 +280,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
Custom);
- if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else {
@@ -290,7 +291,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
- (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
// Zbkb can use rev8+brev8 to implement bitreverse.
@@ -309,6 +311,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
}
+ if (Subtarget.hasVendorXTHeadBb()) {
+ setOperationAction({ISD::CTLZ}, XLenVT, Legal);
+
+ // We need the custom lowering to make sure that the resulting sequence
+ // for the 32bit case is efficient on 64bit targets.
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS, MVT::i32, Custom);
@@ -1212,7 +1223,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
- return Subtarget.hasStdExtZbb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
}
bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 8f6690ac2c403..a994220b34a0d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -54,6 +54,38 @@ class THShiftALU_rri<bits<3> funct3, string opcodestr>
let Inst{26-25} = uimm2;
}
+let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
+ : RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$shamt),
+ opcodestr, "$rd, $rs1, $shamt">;
+
+class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
+ opcodestr, "$rd, $rs1, $msb, $lsb"> {
+ bits<6> msb;
+ bits<6> lsb;
+ let Inst{31-26} = msb;
+ let Inst{25-20} = lsb;
+}
+
+class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
+ : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1"> {
+ let rs3 = funct5;
+ let rs2 = 0;
+}
+}
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimm5:$shamt),
+ opcodestr, "$rd, $rs1, $shamt">;
+
//===----------------------------------------------------------------------===//
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
@@ -75,6 +107,21 @@ def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
} // Predicates = [HasVendorXTHeadBa]
+let Predicates = [HasVendorXTHeadBb] in {
+def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
+def TH_EXT : THBitfieldExtract_rii<0b010, "th.ext">;
+def TH_EXTU : THBitfieldExtract_rii<0b011, "th.extu">;
+def TH_FF0 : THRev_r<0b10000, 0b10, "th.ff0">;
+def TH_FF1 : THRev_r<0b10000, 0b11, "th.ff1">;
+def TH_REV : THRev_r<0b10000, 0b01, "th.rev">;
+def TH_TSTNBZ : THRev_r<0b10000, 0b00, "th.tstnbz">;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], IsSignExtendingOpW = 1 in {
+def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
+def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
let IsSignExtendingOpW = 1 in
def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
@@ -169,6 +216,49 @@ def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
} // Predicates = [HasVendorXTHeadBa]
+let Predicates = [HasVendorXTHeadBb] in {
+def : PatGprImm<rotr, TH_SRRI, uimmlog2xlen>;
+// There's no encoding for a rotate-left-immediate in X-THead-Bb, as
+// it can be implemented with th.srri by negating the immediate.
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+ (TH_SRRI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
+def : Pat<(rotr GPR:$rs1, GPR:$rs2),
+ (OR (SRL GPR:$rs1, GPR:$rs2),
+ (SLL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
+def : Pat<(rotl GPR:$rs1, GPR:$rs2),
+ (OR (SLL GPR:$rs1, GPR:$rs2),
+ (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
+//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>;
+//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>;
+def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>;
+def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>;
+def : Pat<(sext_inreg GPR:$rs1, i1), (TH_EXT GPR:$rs1, 0, 0)>;
+def : PatGpr<ctlz, TH_FF1>;
+def : Pat<(ctlz (xor GPR:$rs1, -1)), (TH_FF0 GPR:$rs1)>;
+def : PatGpr<bswap, TH_REV>;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64] in {
+def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+ (TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+def : Pat<(riscv_rorw i64:$rs1, i64:$rs2),
+ (OR (SRLW i64:$rs1, i64:$rs2),
+ (SLLW i64:$rs1, (SUB X0, i64:$rs2)))>;
+def : Pat<(riscv_rolw i64:$rs1, i64:$rs2),
+ (OR (SLLW i64:$rs1, i64:$rs2),
+ (SRLW i64:$rs1, (SUB X0, i64:$rs2)))>;
+def : Pat<(sra (bswap i64:$rs1), (i64 32)),
+ (TH_REVW i64:$rs1)>;
+def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)),
+ (TH_REVW i64:$rs1)>;
+def : Pat<(riscv_clzw i64:$rs1),
+ (TH_FF0 (SLLI (XORI i64:$rs1, -1), 32))>;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
let Predicates = [HasVendorXTHeadBs] in {
def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), 1),
(TH_TST GPR:$rs1, uimmlog2xlen:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index e7a1ea0e1bdff..f251c9808f05c 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -11,6 +11,10 @@
; RUN: | FileCheck %s -check-prefix=RV32ZBB
; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64ZBB
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
declare i8 @llvm.cttz.i8(i8, i1)
declare i16 @llvm.cttz.i16(i16, i1)
@@ -83,6 +87,38 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
; RV64ZBB-NEXT: ori a0, a0, 256
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i8:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a1, a0, 255
+; RV32XTHEADBB-NEXT: beqz a1, .LBB0_2
+; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB0_2:
+; RV32XTHEADBB-NEXT: li a0, 8
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i8:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a1, a0, 255
+; RV64XTHEADBB-NEXT: beqz a1, .LBB0_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB0_2:
+; RV64XTHEADBB-NEXT: li a0, 8
+; RV64XTHEADBB-NEXT: ret
%tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false)
ret i8 %tmp
}
@@ -161,6 +197,38 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
; RV64ZBB-NEXT: or a0, a0, a1
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i16:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slli a1, a0, 16
+; RV32XTHEADBB-NEXT: beqz a1, .LBB1_2
+; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB1_2:
+; RV32XTHEADBB-NEXT: li a0, 16
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i16:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: slli a1, a0, 48
+; RV64XTHEADBB-NEXT: beqz a1, .LBB1_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB1_2:
+; RV64XTHEADBB-NEXT: li a0, 16
+; RV64XTHEADBB-NEXT: ret
%tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false)
ret i16 %tmp
}
@@ -261,6 +329,37 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctzw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2
+; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB2_2:
+; RV32XTHEADBB-NEXT: li a0, 32
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sext.w a1, a0
+; RV64XTHEADBB-NEXT: beqz a1, .LBB2_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB2_2:
+; RV64XTHEADBB-NEXT: li a0, 32
+; RV64XTHEADBB-NEXT: ret
%tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
ret i32 %tmp
}
@@ -408,6 +507,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: addi a0, a1, -1
+; RV32XTHEADBB-NEXT: not a1, a1
+; RV32XTHEADBB-NEXT: and a0, a1, a0
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 64
+; RV32XTHEADBB-NEXT: j .LBB3_3
+; RV32XTHEADBB-NEXT: .LBB3_2:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: beqz a0, .LBB3_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB3_2:
+; RV64XTHEADBB-NEXT: li a0, 64
+; RV64XTHEADBB-NEXT: ret
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
ret i64 %tmp
}
@@ -456,6 +591,26 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i8_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i8_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true)
ret i8 %tmp
}
@@ -518,6 +673,26 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i16_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i16_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
ret i16 %tmp
}
@@ -596,6 +771,26 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctzw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i32_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i32_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true)
ret i32 %tmp
}
@@ -723,6 +918,37 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_cttz_i64_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a0, .LBB7_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: addi a0, a1, -1
+; RV32XTHEADBB-NEXT: not a1, a1
+; RV32XTHEADBB-NEXT: and a0, a1, a0
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 64
+; RV32XTHEADBB-NEXT: j .LBB7_3
+; RV32XTHEADBB-NEXT: .LBB7_2:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: .LBB7_3:
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_cttz_i64_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true)
ret i64 %tmp
}
@@ -801,6 +1027,20 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: addi a0, a0, -56
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i8:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a0, a0, 255
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, -24
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i8:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a0, a0, 255
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -56
+; RV64XTHEADBB-NEXT: ret
%tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
ret i8 %tmp
}
@@ -897,6 +1137,20 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: addi a0, a0, -48
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i16:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, -16
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i16:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -48
+; RV64XTHEADBB-NEXT: ret
%tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
ret i16 %tmp
}
@@ -1081,6 +1335,18 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: clzw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
ret i32 %tmp
}
@@ -1344,6 +1610,24 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a1, .LBB11_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, 32
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB11_2:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a1
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
ret i64 %tmp
}
@@ -1410,6 +1694,20 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind {
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: addi a0, a0, -56
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i8_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a0, a0, 255
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, -24
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i8_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a0, a0, 255
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -56
+; RV64XTHEADBB-NEXT: ret
%tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
ret i8 %tmp
}
@@ -1496,6 +1794,20 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: addi a0, a0, -48
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, -16
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -48
+; RV64XTHEADBB-NEXT: ret
%tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
ret i16 %tmp
}
@@ -1658,6 +1970,18 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: clzw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i32_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i32_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
ret i32 %tmp
}
@@ -1911,6 +2235,24 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: clz a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctlz_i64_zero_undef:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a1, .LBB15_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, 32
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB15_2:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a1
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctlz_i64_zero_undef:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: ret
%tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
ret i64 %tmp
}
@@ -1955,6 +2297,34 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
; RV64ZBB-NEXT: andi a0, a0, 255
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i8:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: andi a1, a1, 85
+; RV32XTHEADBB-NEXT: sub a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a1, a0, 51
+; RV32XTHEADBB-NEXT: srli a0, a0, 2
+; RV32XTHEADBB-NEXT: andi a0, a0, 51
+; RV32XTHEADBB-NEXT: add a0, a1, a0
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 15
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i8:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: andi a1, a1, 85
+; RV64XTHEADBB-NEXT: subw a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a1, a0, 51
+; RV64XTHEADBB-NEXT: srli a0, a0, 2
+; RV64XTHEADBB-NEXT: andi a0, a0, 51
+; RV64XTHEADBB-NEXT: add a0, a1, a0
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 15
+; RV64XTHEADBB-NEXT: ret
%1 = call i8 @llvm.ctpop.i8(i8 %a)
ret i8 %1
}
@@ -2013,6 +2383,48 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i16:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: lui a2, 5
+; RV32XTHEADBB-NEXT: addi a2, a2, 1365
+; RV32XTHEADBB-NEXT: and a1, a1, a2
+; RV32XTHEADBB-NEXT: sub a0, a0, a1
+; RV32XTHEADBB-NEXT: lui a1, 3
+; RV32XTHEADBB-NEXT: addi a1, a1, 819
+; RV32XTHEADBB-NEXT: and a2, a0, a1
+; RV32XTHEADBB-NEXT: srli a0, a0, 2
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: add a0, a2, a0
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a1, a0, 15
+; RV32XTHEADBB-NEXT: slli a0, a0, 20
+; RV32XTHEADBB-NEXT: srli a0, a0, 28
+; RV32XTHEADBB-NEXT: add a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i16:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: lui a2, 5
+; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
+; RV64XTHEADBB-NEXT: and a1, a1, a2
+; RV64XTHEADBB-NEXT: sub a0, a0, a1
+; RV64XTHEADBB-NEXT: lui a1, 3
+; RV64XTHEADBB-NEXT: addiw a1, a1, 819
+; RV64XTHEADBB-NEXT: and a2, a0, a1
+; RV64XTHEADBB-NEXT: srli a0, a0, 2
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: add a0, a2, a0
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a1, a0, 15
+; RV64XTHEADBB-NEXT: slli a0, a0, 52
+; RV64XTHEADBB-NEXT: srli a0, a0, 60
+; RV64XTHEADBB-NEXT: add a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %1
}
@@ -2131,6 +2543,62 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi sp, sp, -16
+; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: lui a2, 349525
+; RV32XTHEADBB-NEXT: addi a2, a2, 1365
+; RV32XTHEADBB-NEXT: and a1, a1, a2
+; RV32XTHEADBB-NEXT: sub a0, a0, a1
+; RV32XTHEADBB-NEXT: lui a1, 209715
+; RV32XTHEADBB-NEXT: addi a1, a1, 819
+; RV32XTHEADBB-NEXT: and a2, a0, a1
+; RV32XTHEADBB-NEXT: srli a0, a0, 2
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: add a0, a2, a0
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: lui a1, 61681
+; RV32XTHEADBB-NEXT: addi a1, a1, -241
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: lui a1, 4112
+; RV32XTHEADBB-NEXT: addi a1, a1, 257
+; RV32XTHEADBB-NEXT: call __mulsi3 at plt
+; RV32XTHEADBB-NEXT: srli a0, a0, 24
+; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: addi sp, sp, 16
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi sp, sp, -16
+; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: lui a2, 349525
+; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
+; RV64XTHEADBB-NEXT: and a1, a1, a2
+; RV64XTHEADBB-NEXT: sub a0, a0, a1
+; RV64XTHEADBB-NEXT: lui a1, 209715
+; RV64XTHEADBB-NEXT: addiw a1, a1, 819
+; RV64XTHEADBB-NEXT: and a2, a0, a1
+; RV64XTHEADBB-NEXT: srli a0, a0, 2
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: add a0, a2, a0
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: lui a1, 61681
+; RV64XTHEADBB-NEXT: addiw a1, a1, -241
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: lui a1, 4112
+; RV64XTHEADBB-NEXT: addiw a1, a1, 257
+; RV64XTHEADBB-NEXT: call __muldi3 at plt
+; RV64XTHEADBB-NEXT: srliw a0, a0, 24
+; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT: addi sp, sp, 16
+; RV64XTHEADBB-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
}
@@ -2295,6 +2763,91 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: cpop a0, a0
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_ctpop_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi sp, sp, -32
+; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32XTHEADBB-NEXT: mv s0, a0
+; RV32XTHEADBB-NEXT: srli a0, a1, 1
+; RV32XTHEADBB-NEXT: lui a2, 349525
+; RV32XTHEADBB-NEXT: addi s2, a2, 1365
+; RV32XTHEADBB-NEXT: and a0, a0, s2
+; RV32XTHEADBB-NEXT: sub a1, a1, a0
+; RV32XTHEADBB-NEXT: lui a0, 209715
+; RV32XTHEADBB-NEXT: addi s3, a0, 819
+; RV32XTHEADBB-NEXT: and a0, a1, s3
+; RV32XTHEADBB-NEXT: srli a1, a1, 2
+; RV32XTHEADBB-NEXT: and a1, a1, s3
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: lui a1, 61681
+; RV32XTHEADBB-NEXT: addi s4, a1, -241
+; RV32XTHEADBB-NEXT: and a0, a0, s4
+; RV32XTHEADBB-NEXT: lui a1, 4112
+; RV32XTHEADBB-NEXT: addi s1, a1, 257
+; RV32XTHEADBB-NEXT: mv a1, s1
+; RV32XTHEADBB-NEXT: call __mulsi3 at plt
+; RV32XTHEADBB-NEXT: srli s5, a0, 24
+; RV32XTHEADBB-NEXT: srli a0, s0, 1
+; RV32XTHEADBB-NEXT: and a0, a0, s2
+; RV32XTHEADBB-NEXT: sub s0, s0, a0
+; RV32XTHEADBB-NEXT: and a0, s0, s3
+; RV32XTHEADBB-NEXT: srli s0, s0, 2
+; RV32XTHEADBB-NEXT: and a1, s0, s3
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: and a0, a0, s4
+; RV32XTHEADBB-NEXT: mv a1, s1
+; RV32XTHEADBB-NEXT: call __mulsi3 at plt
+; RV32XTHEADBB-NEXT: srli a0, a0, 24
+; RV32XTHEADBB-NEXT: add a0, a0, s5
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32XTHEADBB-NEXT: addi sp, sp, 32
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_ctpop_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi sp, sp, -16
+; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_0)
+; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_0)(a1)
+; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_1)
+; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_1)(a2)
+; RV64XTHEADBB-NEXT: srli a3, a0, 1
+; RV64XTHEADBB-NEXT: and a1, a3, a1
+; RV64XTHEADBB-NEXT: sub a0, a0, a1
+; RV64XTHEADBB-NEXT: and a1, a0, a2
+; RV64XTHEADBB-NEXT: srli a0, a0, 2
+; RV64XTHEADBB-NEXT: and a0, a0, a2
+; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_2)
+; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_2)(a2)
+; RV64XTHEADBB-NEXT: add a0, a1, a0
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: and a0, a0, a2
+; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_3)
+; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_3)(a1)
+; RV64XTHEADBB-NEXT: call __muldi3 at plt
+; RV64XTHEADBB-NEXT: srli a0, a0, 56
+; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT: addi sp, sp, 16
+; RV64XTHEADBB-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %1
}
@@ -2337,6 +2890,30 @@ define i8 @test_parity_i8(i8 %a) {
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: andi a0, a0, 1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i8:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a0, a0, 255
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 2
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i8:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a0, a0, 255
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 2
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 1
+; RV64XTHEADBB-NEXT: ret
%1 = call i8 @llvm.ctpop.i8(i8 %a)
%2 = and i8 %1, 1
ret i8 %2
@@ -2386,6 +2963,34 @@ define i16 @test_parity_i16(i16 %a) {
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: andi a0, a0, 1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i16:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: srli a1, a0, 8
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 2
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i16:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: srli a1, a0, 8
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 2
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 1
+; RV64XTHEADBB-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
%2 = and i16 %1, 1
ret i16 %2
@@ -2435,6 +3040,37 @@ define i32 @test_parity_i32(i32 %a) {
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: andi a0, a0, 1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srli a1, a0, 16
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 8
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 2
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0
+; RV64XTHEADBB-NEXT: srliw a0, a0, 16
+; RV64XTHEADBB-NEXT: xor a0, a1, a0
+; RV64XTHEADBB-NEXT: srli a1, a0, 8
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 2
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 1
+; RV64XTHEADBB-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
%2 = and i32 %1, 1
ret i32 %2
@@ -2488,6 +3124,40 @@ define i64 @test_parity_i64(i64 %a) {
; RV64ZBB-NEXT: cpop a0, a0
; RV64ZBB-NEXT: andi a0, a0, 1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: test_parity_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 16
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 8
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 4
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 2
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: srli a1, a0, 1
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: andi a0, a0, 1
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: test_parity_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srli a1, a0, 32
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 16
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 8
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 4
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 2
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: srli a1, a0, 1
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: andi a0, a0, 1
+; RV64XTHEADBB-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
%2 = and i64 %1, 1
ret i64 %2
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 15ea6cbd33471..45366798bf9af 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -9,6 +9,8 @@
; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBB
; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbs \
; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \
+; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB
; Materializing constants
@@ -41,6 +43,11 @@ define signext i32 @zero() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: li a0, 0
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: zero:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 0
+; RV64IXTHEADBB-NEXT: ret
ret i32 0
}
@@ -69,6 +76,11 @@ define signext i32 @pos_small() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: li a0, 2047
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: pos_small:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 2047
+; RV64IXTHEADBB-NEXT: ret
ret i32 2047
}
@@ -97,6 +109,11 @@ define signext i32 @neg_small() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: li a0, -2048
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: neg_small:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -2048
+; RV64IXTHEADBB-NEXT: ret
ret i32 -2048
}
@@ -130,6 +147,12 @@ define signext i32 @pos_i32() nounwind {
; RV64IZBS-NEXT: lui a0, 423811
; RV64IZBS-NEXT: addiw a0, a0, -1297
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 423811
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1297
+; RV64IXTHEADBB-NEXT: ret
ret i32 1735928559
}
@@ -163,6 +186,12 @@ define signext i32 @neg_i32() nounwind {
; RV64IZBS-NEXT: lui a0, 912092
; RV64IZBS-NEXT: addiw a0, a0, -273
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 912092
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT: ret
ret i32 -559038737
}
@@ -191,6 +220,11 @@ define signext i32 @pos_i32_hi20_only() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 16
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32_hi20_only:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 16
+; RV64IXTHEADBB-NEXT: ret
ret i32 65536 ; 0x10000
}
@@ -219,6 +253,11 @@ define signext i32 @neg_i32_hi20_only() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 1048560
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32_hi20_only:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048560
+; RV64IXTHEADBB-NEXT: ret
ret i32 -65536 ; -0x10000
}
@@ -254,6 +293,12 @@ define signext i32 @imm_left_shifted_addi() nounwind {
; RV64IZBS-NEXT: lui a0, 32
; RV64IZBS-NEXT: addiw a0, a0, -64
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_addi:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 32
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -64
+; RV64IXTHEADBB-NEXT: ret
ret i32 131008 ; 0x1FFC0
}
@@ -289,6 +334,12 @@ define signext i32 @imm_right_shifted_addi() nounwind {
; RV64IZBS-NEXT: lui a0, 524288
; RV64IZBS-NEXT: addiw a0, a0, -1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_addi:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 524288
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1
+; RV64IXTHEADBB-NEXT: ret
ret i32 2147483647 ; 0x7FFFFFFF
}
@@ -324,6 +375,12 @@ define signext i32 @imm_right_shifted_lui() nounwind {
; RV64IZBS-NEXT: lui a0, 56
; RV64IZBS-NEXT: addiw a0, a0, 580
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 56
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 580
+; RV64IXTHEADBB-NEXT: ret
ret i32 229956 ; 0x38244
}
@@ -356,6 +413,12 @@ define i64 @imm64_1() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: bseti a0, zero, 31
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 31
+; RV64IXTHEADBB-NEXT: ret
ret i64 2147483648 ; 0x8000_0000
}
@@ -389,6 +452,12 @@ define i64 @imm64_2() nounwind {
; RV64IZBS-NEXT: li a0, -1
; RV64IZBS-NEXT: srli a0, a0, 32
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_2:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: srli a0, a0, 32
+; RV64IXTHEADBB-NEXT: ret
ret i64 4294967295 ; 0xFFFF_FFFF
}
@@ -421,6 +490,12 @@ define i64 @imm64_3() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: bseti a0, zero, 32
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_3:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 32
+; RV64IXTHEADBB-NEXT: ret
ret i64 4294967296 ; 0x1_0000_0000
}
@@ -453,6 +528,12 @@ define i64 @imm64_4() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: bseti a0, zero, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_4:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 63
+; RV64IXTHEADBB-NEXT: ret
ret i64 9223372036854775808 ; 0x8000_0000_0000_0000
}
@@ -485,6 +566,12 @@ define i64 @imm64_5() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: bseti a0, zero, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_5:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 63
+; RV64IXTHEADBB-NEXT: ret
ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000
}
@@ -523,6 +610,13 @@ define i64 @imm64_6() nounwind {
; RV64IZBS-NEXT: addiw a0, a0, -1329
; RV64IZBS-NEXT: slli a0, a0, 35
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_6:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 9321
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1329
+; RV64IXTHEADBB-NEXT: slli a0, a0, 35
+; RV64IXTHEADBB-NEXT: ret
ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
}
@@ -569,6 +663,15 @@ define i64 @imm64_7() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 24
; RV64IZBS-NEXT: addi a0, a0, 15
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_7:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 7
+; RV64IXTHEADBB-NEXT: slli a0, a0, 36
+; RV64IXTHEADBB-NEXT: addi a0, a0, 11
+; RV64IXTHEADBB-NEXT: slli a0, a0, 24
+; RV64IXTHEADBB-NEXT: addi a0, a0, 15
+; RV64IXTHEADBB-NEXT: ret
ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F
}
@@ -629,6 +732,18 @@ define i64 @imm64_8() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 13
; RV64IZBS-NEXT: addi a0, a0, -272
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_8:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 583
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1875
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -947
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1511
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, -272
+; RV64IXTHEADBB-NEXT: ret
ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0
}
@@ -658,6 +773,11 @@ define i64 @imm64_9() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: li a0, -1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_9:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -1
}
@@ -694,6 +814,12 @@ define i64 @imm_left_shifted_lui_1() nounwind {
; RV64IZBS-NEXT: lui a0, 262145
; RV64IZBS-NEXT: slli a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 262145
+; RV64IXTHEADBB-NEXT: slli a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 2147491840 ; 0x8000_2000
}
@@ -727,6 +853,12 @@ define i64 @imm_left_shifted_lui_2() nounwind {
; RV64IZBS-NEXT: lui a0, 262145
; RV64IZBS-NEXT: slli a0, a0, 2
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_2:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 262145
+; RV64IXTHEADBB-NEXT: slli a0, a0, 2
+; RV64IXTHEADBB-NEXT: ret
ret i64 4294983680 ; 0x1_0000_4000
}
@@ -761,6 +893,12 @@ define i64 @imm_left_shifted_lui_3() nounwind {
; RV64IZBS-NEXT: lui a0, 4097
; RV64IZBS-NEXT: slli a0, a0, 20
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_3:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 4097
+; RV64IXTHEADBB-NEXT: slli a0, a0, 20
+; RV64IXTHEADBB-NEXT: ret
ret i64 17596481011712 ; 0x1001_0000_0000
}
@@ -799,6 +937,12 @@ define i64 @imm_right_shifted_lui_1() nounwind {
; RV64IZBS-NEXT: lui a0, 983056
; RV64IZBS-NEXT: srli a0, a0, 16
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 983056
+; RV64IXTHEADBB-NEXT: srli a0, a0, 16
+; RV64IXTHEADBB-NEXT: ret
ret i64 281474976706561 ; 0xFFFF_FFFF_F001
}
@@ -837,6 +981,13 @@ define i64 @imm_right_shifted_lui_2() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: srli a0, a0, 24
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_2:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1044481
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: srli a0, a0, 24
+; RV64IXTHEADBB-NEXT: ret
ret i64 1099511623681 ; 0xFF_FFFF_F001
}
@@ -877,6 +1028,13 @@ define i64 @imm_decoupled_lui_addi() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 20
; RV64IZBS-NEXT: addi a0, a0, -3
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_decoupled_lui_addi:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 4097
+; RV64IXTHEADBB-NEXT: slli a0, a0, 20
+; RV64IXTHEADBB-NEXT: addi a0, a0, -3
+; RV64IXTHEADBB-NEXT: ret
ret i64 17596481011709 ; 0x1000_FFFF_FFFD
}
@@ -925,6 +1083,15 @@ define i64 @imm_end_xori_1() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 25
; RV64IZBS-NEXT: addi a0, a0, -1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_xori_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 36
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 25
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
}
@@ -974,6 +1141,15 @@ define i64 @imm_end_2addi_1() nounwind {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 2047
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_2addi_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -2047
+; RV64IXTHEADBB-NEXT: slli a0, a0, 27
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 2047
+; RV64IXTHEADBB-NEXT: ret
ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
}
@@ -1030,6 +1206,17 @@ define i64 @imm_2reg_1() nounwind {
; RV64IZBS-NEXT: bseti a0, a0, 62
; RV64IZBS-NEXT: bseti a0, a0, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_2reg_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 35
+; RV64IXTHEADBB-NEXT: addi a0, a0, 9
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, 837
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT: ret
ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
}
@@ -1064,6 +1251,12 @@ define void @imm_store_i16_neg1(ptr %p) nounwind {
; RV64IZBS-NEXT: li a1, -1
; RV64IZBS-NEXT: sh a1, 0(a0)
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i16_neg1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a1, -1
+; RV64IXTHEADBB-NEXT: sh a1, 0(a0)
+; RV64IXTHEADBB-NEXT: ret
store i16 -1, ptr %p
ret void
}
@@ -1099,6 +1292,12 @@ define void @imm_store_i32_neg1(ptr %p) nounwind {
; RV64IZBS-NEXT: li a1, -1
; RV64IZBS-NEXT: sw a1, 0(a0)
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i32_neg1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a1, -1
+; RV64IXTHEADBB-NEXT: sw a1, 0(a0)
+; RV64IXTHEADBB-NEXT: ret
store i32 -1, ptr %p
ret void
}
@@ -1140,6 +1339,14 @@ define i64 @imm_5372288229() {
; RV64IZBS-NEXT: addiw a0, a0, -795
; RV64IZBS-NEXT: bseti a0, a0, 32
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_5372288229:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 160
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 437
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, -795
+; RV64IXTHEADBB-NEXT: ret
ret i64 5372288229
}
@@ -1180,6 +1387,14 @@ define i64 @imm_neg_5372288229() {
; RV64IZBS-NEXT: addiw a0, a0, 795
; RV64IZBS-NEXT: bclri a0, a0, 32
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5372288229:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048416
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -437
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, 795
+; RV64IXTHEADBB-NEXT: ret
ret i64 -5372288229
}
@@ -1220,6 +1435,14 @@ define i64 @imm_8953813715() {
; RV64IZBS-NEXT: addiw a0, a0, -1325
; RV64IZBS-NEXT: bseti a0, a0, 33
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_8953813715:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 267
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -637
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1325
+; RV64IXTHEADBB-NEXT: ret
ret i64 8953813715
}
@@ -1260,6 +1483,14 @@ define i64 @imm_neg_8953813715() {
; RV64IZBS-NEXT: addiw a0, a0, 1325
; RV64IZBS-NEXT: bclri a0, a0, 33
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8953813715:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048309
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 637
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1325
+; RV64IXTHEADBB-NEXT: ret
ret i64 -8953813715
}
@@ -1301,6 +1532,14 @@ define i64 @imm_16116864687() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 1711
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_16116864687:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 961
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1475
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1711
+; RV64IXTHEADBB-NEXT: ret
ret i64 16116864687
}
@@ -1342,6 +1581,14 @@ define i64 @imm_neg_16116864687() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, -1711
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_16116864687:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1047615
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1475
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1711
+; RV64IXTHEADBB-NEXT: ret
ret i64 -16116864687
}
@@ -1380,6 +1627,13 @@ define i64 @imm_2344336315() {
; RV64IZBS-NEXT: slli a0, a0, 2
; RV64IZBS-NEXT: addi a0, a0, -1093
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_2344336315:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 143087
+; RV64IXTHEADBB-NEXT: slli a0, a0, 2
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 2344336315 ; 0x8bbbbbbb
}
@@ -1427,6 +1681,16 @@ define i64 @imm_70370820078523() {
; RV64IZBS-NEXT: addiw a0, a0, -1093
; RV64IZBS-NEXT: bseti a0, a0, 46
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_70370820078523:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 256
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 31
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, -273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 70370820078523 ; 0x40007bbbbbbb
}
@@ -1477,6 +1741,17 @@ define i64 @imm_neg_9223372034778874949() {
; RV64IZBS-NEXT: addiw a0, a0, -1093
; RV64IZBS-NEXT: bseti a0, a0, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 37
+; RV64IXTHEADBB-NEXT: addi a0, a0, 31
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, -273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
}
@@ -1528,6 +1803,17 @@ define i64 @imm_neg_9223301666034697285() {
; RV64IZBS-NEXT: bseti a0, a0, 46
; RV64IZBS-NEXT: bseti a0, a0, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223301666034697285:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 917505
+; RV64IXTHEADBB-NEXT: slli a0, a0, 8
+; RV64IXTHEADBB-NEXT: addi a0, a0, 31
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, -273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 -9223301666034697285 ; 0x800040007bbbbbbb
}
@@ -1566,6 +1852,13 @@ define i64 @imm_neg_2219066437() {
; RV64IZBS-NEXT: slli a0, a0, 2
; RV64IZBS-NEXT: addi a0, a0, -1093
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2219066437:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 913135
+; RV64IXTHEADBB-NEXT: slli a0, a0, 2
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2219066437 ; 0xffffffff7bbbbbbb
}
@@ -1608,6 +1901,14 @@ define i64 @imm_neg_8798043653189() {
; RV64IZBS-NEXT: addiw a0, a0, -1093
; RV64IZBS-NEXT: bclri a0, a0, 43
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8798043653189:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 917475
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb
}
@@ -1653,6 +1954,15 @@ define i64 @imm_9223372034904144827() {
; RV64IZBS-NEXT: addiw a0, a0, -1093
; RV64IZBS-NEXT: bclri a0, a0, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048343
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1911
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT: srli a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
}
@@ -1705,6 +2015,17 @@ define i64 @imm_neg_9223354442718100411() {
; RV64IZBS-NEXT: bclri a0, a0, 44
; RV64IZBS-NEXT: bclri a0, a0, 63
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223354442718100411:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 524287
+; RV64IXTHEADBB-NEXT: slli a0, a0, 6
+; RV64IXTHEADBB-NEXT: addi a0, a0, -29
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, -273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 14
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: ret
ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb
}
@@ -1743,6 +2064,13 @@ define i64 @imm_2863311530() {
; RV64IZBS-NEXT: addiw a0, a0, 1365
; RV64IZBS-NEXT: slli a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_2863311530:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 349525
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT: slli a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 2863311530 ; #0xaaaaaaaa
}
@@ -1781,6 +2109,13 @@ define i64 @imm_neg_2863311530() {
; RV64IZBS-NEXT: addiw a0, a0, -1365
; RV64IZBS-NEXT: slli a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2863311530:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 699051
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1365
+; RV64IXTHEADBB-NEXT: slli a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2863311530 ; #0xffffffff55555556
}
@@ -1818,6 +2153,13 @@ define i64 @imm_2147486378() {
; RV64IZBS-NEXT: li a0, 1365
; RV64IZBS-NEXT: bseti a0, a0, 31
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_2147486378:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 31
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT: ret
ret i64 2147485013
}
@@ -1852,6 +2194,12 @@ define i64 @imm_neg_2147485013() {
; RV64IZBS-NEXT: lui a0, 524288
; RV64IZBS-NEXT: addi a0, a0, -1365
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2147485013:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 524288
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1365
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2147485013
}
@@ -1894,6 +2242,14 @@ define i64 @imm_12900924131259() {
; RV64IZBS-NEXT: slli a0, a0, 24
; RV64IZBS-NEXT: addi a0, a0, 1979
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900924131259:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 188
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a0, a0, 24
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1979
+; RV64IXTHEADBB-NEXT: ret
ret i64 12900924131259
}
@@ -1930,6 +2286,13 @@ define i64 @imm_50394234880() {
; RV64IZBS-NEXT: addiw a0, a0, -1093
; RV64IZBS-NEXT: slli a0, a0, 16
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_50394234880:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 188
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a0, a0, 16
+; RV64IXTHEADBB-NEXT: ret
ret i64 50394234880
}
@@ -1976,6 +2339,15 @@ define i64 @imm_12900936431479() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 1911
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900936431479:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 192239
+; RV64IXTHEADBB-NEXT: slli a0, a0, 2
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT: ret
ret i64 12900936431479
}
@@ -2022,6 +2394,15 @@ define i64 @imm_12900918536874() {
; RV64IZBS-NEXT: addi a0, a0, 1365
; RV64IZBS-NEXT: slli a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900918536874:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 384477
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT: slli a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 12900918536874
}
@@ -2071,6 +2452,16 @@ define i64 @imm_12900925247761() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 273
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900925247761:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 188
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 273
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 273
+; RV64IXTHEADBB-NEXT: ret
ret i64 12900925247761
}
@@ -2112,6 +2503,14 @@ define i64 @imm_7158272001() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_7158272001:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 427
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1367
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 7158272001 ; 0x0000_0001_aaaa_9001
}
@@ -2153,6 +2552,14 @@ define i64 @imm_12884889601() {
; RV64IZBS-NEXT: slli a0, a0, 12
; RV64IZBS-NEXT: addi a0, a0, 1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_12884889601:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 768
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -3
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 12884889601 ; 0x0000_0002_ffff_d001
}
@@ -2193,6 +2600,14 @@ define i64 @imm_neg_3435982847() {
; RV64IZBS-NEXT: addiw a0, a0, 1
; RV64IZBS-NEXT: bclri a0, a0, 31
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_3435982847:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048371
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 817
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -3435982847 ; 0xffff_ffff_3333_1001
}
@@ -2233,6 +2648,14 @@ define i64 @imm_neg_5726842879() {
; RV64IZBS-NEXT: addiw a0, a0, 1
; RV64IZBS-NEXT: bclri a0, a0, 32
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5726842879:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048235
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1419
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001
}
@@ -2273,6 +2696,14 @@ define i64 @imm_neg_10307948543() {
; RV64IZBS-NEXT: addiw a0, a0, 1
; RV64IZBS-NEXT: bclri a0, a0, 33
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_10307948543:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1047962
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1645
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: addi a0, a0, 1
+; RV64IXTHEADBB-NEXT: ret
ret i64 -10307948543 ; 0xffff_fffd_9999_3001
}
@@ -2310,6 +2741,12 @@ define i64 @li_rori_1() {
; RV64IZBS-NEXT: slli a0, a0, 43
; RV64IZBS-NEXT: addi a0, a0, -1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -18
+; RV64IXTHEADBB-NEXT: th.srri a0, a0, 21
+; RV64IXTHEADBB-NEXT: ret
ret i64 -149533581377537
}
@@ -2347,6 +2784,12 @@ define i64 @li_rori_2() {
; RV64IZBS-NEXT: slli a0, a0, 60
; RV64IZBS-NEXT: addi a0, a0, -6
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_2:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -86
+; RV64IXTHEADBB-NEXT: th.srri a0, a0, 4
+; RV64IXTHEADBB-NEXT: ret
ret i64 -5764607523034234886
}
@@ -2384,6 +2827,12 @@ define i64 @li_rori_3() {
; RV64IZBS-NEXT: slli a0, a0, 27
; RV64IZBS-NEXT: addi a0, a0, -1
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_3:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, -18
+; RV64IXTHEADBB-NEXT: th.srri a0, a0, 37
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2281701377
}
@@ -2420,6 +2869,13 @@ define i64 @PR54812() {
; RV64IZBS-NEXT: lui a0, 1045887
; RV64IZBS-NEXT: bclri a0, a0, 31
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: PR54812:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: lui a0, 1048447
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1407
+; RV64IXTHEADBB-NEXT: slli a0, a0, 12
+; RV64IXTHEADBB-NEXT: ret
ret i64 -2158497792;
}
@@ -2452,6 +2908,12 @@ define signext i32 @pos_2048() nounwind {
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: bseti a0, zero, 11
; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: pos_2048:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 11
+; RV64IXTHEADBB-NEXT: ret
ret i32 2048
}
diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index f735d21775114..132e73e080afb 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -7,6 +7,10 @@
; RUN: | FileCheck %s -check-prefix=RV32ZBB
; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64ZBB
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
; NOTE: -enable-legalize-types-checking is on one command line due to a previous
; assertion failure on an expensive checks build for @rotr_32_mask_multiple.
@@ -40,6 +44,22 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rolw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: srl a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sllw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srlw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 32, %y
%b = shl i32 %x, %y
%c = lshr i32 %x, %z
@@ -73,6 +93,22 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rorw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: sll a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srlw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sllw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 32, %y
%b = lshr i32 %x, %y
%c = shl i32 %x, %z
@@ -177,6 +213,56 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rol a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a4, a0, a2
+; RV32XTHEADBB-NEXT: addi a3, a2, -32
+; RV32XTHEADBB-NEXT: slti a5, a3, 0
+; RV32XTHEADBB-NEXT: neg a5, a5
+; RV32XTHEADBB-NEXT: bltz a3, .LBB2_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a4
+; RV32XTHEADBB-NEXT: j .LBB2_3
+; RV32XTHEADBB-NEXT: .LBB2_2:
+; RV32XTHEADBB-NEXT: sll a3, a1, a2
+; RV32XTHEADBB-NEXT: not a6, a2
+; RV32XTHEADBB-NEXT: srli a7, a0, 1
+; RV32XTHEADBB-NEXT: srl a6, a7, a6
+; RV32XTHEADBB-NEXT: or a3, a3, a6
+; RV32XTHEADBB-NEXT: .LBB2_3:
+; RV32XTHEADBB-NEXT: and a4, a5, a4
+; RV32XTHEADBB-NEXT: neg a7, a2
+; RV32XTHEADBB-NEXT: li a5, 32
+; RV32XTHEADBB-NEXT: sub a6, a5, a2
+; RV32XTHEADBB-NEXT: srl a5, a1, a7
+; RV32XTHEADBB-NEXT: bltz a6, .LBB2_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: mv a0, a5
+; RV32XTHEADBB-NEXT: j .LBB2_6
+; RV32XTHEADBB-NEXT: .LBB2_5:
+; RV32XTHEADBB-NEXT: srl a0, a0, a7
+; RV32XTHEADBB-NEXT: li a7, 64
+; RV32XTHEADBB-NEXT: sub a2, a7, a2
+; RV32XTHEADBB-NEXT: not a2, a2
+; RV32XTHEADBB-NEXT: slli a1, a1, 1
+; RV32XTHEADBB-NEXT: sll a1, a1, a2
+; RV32XTHEADBB-NEXT: or a0, a0, a1
+; RV32XTHEADBB-NEXT: .LBB2_6:
+; RV32XTHEADBB-NEXT: slti a1, a6, 0
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: and a1, a1, a5
+; RV32XTHEADBB-NEXT: or a1, a3, a1
+; RV32XTHEADBB-NEXT: or a0, a4, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sll a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srl a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i64 64, %y
%b = shl i64 %x, %y
%c = lshr i64 %x, %z
@@ -281,6 +367,56 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ror a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a4, a1, a2
+; RV32XTHEADBB-NEXT: addi a3, a2, -32
+; RV32XTHEADBB-NEXT: slti a5, a3, 0
+; RV32XTHEADBB-NEXT: neg a5, a5
+; RV32XTHEADBB-NEXT: bltz a3, .LBB3_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a4
+; RV32XTHEADBB-NEXT: j .LBB3_3
+; RV32XTHEADBB-NEXT: .LBB3_2:
+; RV32XTHEADBB-NEXT: srl a3, a0, a2
+; RV32XTHEADBB-NEXT: not a6, a2
+; RV32XTHEADBB-NEXT: slli a7, a1, 1
+; RV32XTHEADBB-NEXT: sll a6, a7, a6
+; RV32XTHEADBB-NEXT: or a3, a3, a6
+; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: and a4, a5, a4
+; RV32XTHEADBB-NEXT: neg a7, a2
+; RV32XTHEADBB-NEXT: li a5, 32
+; RV32XTHEADBB-NEXT: sub a6, a5, a2
+; RV32XTHEADBB-NEXT: sll a5, a0, a7
+; RV32XTHEADBB-NEXT: bltz a6, .LBB3_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: mv a1, a5
+; RV32XTHEADBB-NEXT: j .LBB3_6
+; RV32XTHEADBB-NEXT: .LBB3_5:
+; RV32XTHEADBB-NEXT: sll a1, a1, a7
+; RV32XTHEADBB-NEXT: li a7, 64
+; RV32XTHEADBB-NEXT: sub a2, a7, a2
+; RV32XTHEADBB-NEXT: not a2, a2
+; RV32XTHEADBB-NEXT: srli a0, a0, 1
+; RV32XTHEADBB-NEXT: srl a0, a0, a2
+; RV32XTHEADBB-NEXT: or a1, a1, a0
+; RV32XTHEADBB-NEXT: .LBB3_6:
+; RV32XTHEADBB-NEXT: slti a0, a6, 0
+; RV32XTHEADBB-NEXT: neg a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a5
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: or a1, a4, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srl a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sll a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i64 64, %y
%b = lshr i64 %x, %y
%c = shl i64 %x, %z
@@ -314,6 +450,22 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rolw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: srl a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sllw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srlw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 0, %y
%and = and i32 %z, 31
%b = shl i32 %x, %y
@@ -348,6 +500,22 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rolw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: srl a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sllw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srlw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = and i32 %y, 63
%b = shl i32 %x, %a
%c = sub i32 0, %y
@@ -385,6 +553,22 @@ define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rolw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_or_64_or_32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: srl a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_or_64_or_32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sllw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srlw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = or i32 %y, 64
%b = shl i32 %x, %a
%c = sub i32 0, %y
@@ -420,6 +604,22 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rorw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: sll a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srlw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sllw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 0, %y
%and = and i32 %z, 31
%b = lshr i32 %x, %y
@@ -454,6 +654,22 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rorw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: sll a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srlw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sllw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = and i32 %y, 63
%b = lshr i32 %x, %a
%c = sub i32 0, %y
@@ -491,6 +707,22 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rorw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_or_64_or_32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a2, a0, a1
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: sll a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_or_64_or_32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srlw a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sllw a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = or i32 %y, 64
%b = lshr i32 %x, %a
%c = sub i32 0, %y
@@ -593,6 +825,54 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rol a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: addi a5, a2, -32
+; RV32XTHEADBB-NEXT: sll a4, a0, a2
+; RV32XTHEADBB-NEXT: bltz a5, .LBB10_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a4
+; RV32XTHEADBB-NEXT: j .LBB10_3
+; RV32XTHEADBB-NEXT: .LBB10_2:
+; RV32XTHEADBB-NEXT: sll a3, a1, a2
+; RV32XTHEADBB-NEXT: not a6, a2
+; RV32XTHEADBB-NEXT: srli a7, a0, 1
+; RV32XTHEADBB-NEXT: srl a6, a7, a6
+; RV32XTHEADBB-NEXT: or a3, a3, a6
+; RV32XTHEADBB-NEXT: .LBB10_3:
+; RV32XTHEADBB-NEXT: slti a5, a5, 0
+; RV32XTHEADBB-NEXT: neg a5, a5
+; RV32XTHEADBB-NEXT: and a4, a5, a4
+; RV32XTHEADBB-NEXT: neg a6, a2
+; RV32XTHEADBB-NEXT: srl a2, a1, a6
+; RV32XTHEADBB-NEXT: andi a5, a6, 63
+; RV32XTHEADBB-NEXT: addi a7, a5, -32
+; RV32XTHEADBB-NEXT: slti t0, a7, 0
+; RV32XTHEADBB-NEXT: neg t0, t0
+; RV32XTHEADBB-NEXT: and a2, t0, a2
+; RV32XTHEADBB-NEXT: bltz a7, .LBB10_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: srl a0, a1, a5
+; RV32XTHEADBB-NEXT: j .LBB10_6
+; RV32XTHEADBB-NEXT: .LBB10_5:
+; RV32XTHEADBB-NEXT: srl a0, a0, a6
+; RV32XTHEADBB-NEXT: not a5, a5
+; RV32XTHEADBB-NEXT: slli a1, a1, 1
+; RV32XTHEADBB-NEXT: sll a1, a1, a5
+; RV32XTHEADBB-NEXT: or a0, a0, a1
+; RV32XTHEADBB-NEXT: .LBB10_6:
+; RV32XTHEADBB-NEXT: or a0, a4, a0
+; RV32XTHEADBB-NEXT: or a1, a3, a2
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sll a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srl a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i64 0, %y
%and = and i64 %z, 63
%b = shl i64 %x, %y
@@ -696,6 +976,55 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rol a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a3, a2, 127
+; RV32XTHEADBB-NEXT: addi a4, a3, -32
+; RV32XTHEADBB-NEXT: bltz a4, .LBB11_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: sll a3, a0, a3
+; RV32XTHEADBB-NEXT: j .LBB11_3
+; RV32XTHEADBB-NEXT: .LBB11_2:
+; RV32XTHEADBB-NEXT: sll a5, a1, a2
+; RV32XTHEADBB-NEXT: srli a6, a0, 1
+; RV32XTHEADBB-NEXT: not a3, a3
+; RV32XTHEADBB-NEXT: srl a3, a6, a3
+; RV32XTHEADBB-NEXT: or a3, a5, a3
+; RV32XTHEADBB-NEXT: .LBB11_3:
+; RV32XTHEADBB-NEXT: sll a5, a0, a2
+; RV32XTHEADBB-NEXT: slti a4, a4, 0
+; RV32XTHEADBB-NEXT: neg a4, a4
+; RV32XTHEADBB-NEXT: and a4, a4, a5
+; RV32XTHEADBB-NEXT: neg a6, a2
+; RV32XTHEADBB-NEXT: srl a2, a1, a6
+; RV32XTHEADBB-NEXT: andi a5, a6, 63
+; RV32XTHEADBB-NEXT: addi a7, a5, -32
+; RV32XTHEADBB-NEXT: slti t0, a7, 0
+; RV32XTHEADBB-NEXT: neg t0, t0
+; RV32XTHEADBB-NEXT: and a2, t0, a2
+; RV32XTHEADBB-NEXT: bltz a7, .LBB11_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: srl a0, a1, a5
+; RV32XTHEADBB-NEXT: j .LBB11_6
+; RV32XTHEADBB-NEXT: .LBB11_5:
+; RV32XTHEADBB-NEXT: srl a0, a0, a6
+; RV32XTHEADBB-NEXT: not a5, a5
+; RV32XTHEADBB-NEXT: slli a1, a1, 1
+; RV32XTHEADBB-NEXT: sll a1, a1, a5
+; RV32XTHEADBB-NEXT: or a0, a0, a1
+; RV32XTHEADBB-NEXT: .LBB11_6:
+; RV32XTHEADBB-NEXT: or a0, a4, a0
+; RV32XTHEADBB-NEXT: or a1, a3, a2
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sll a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srl a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = and i64 %y, 127
%b = shl i64 %x, %a
%c = sub i64 0, %y
@@ -736,6 +1065,22 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rol a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_or_128_or_64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: sll a3, a0, a2
+; RV32XTHEADBB-NEXT: neg a0, a2
+; RV32XTHEADBB-NEXT: srl a0, a1, a0
+; RV32XTHEADBB-NEXT: mv a1, a3
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_or_128_or_64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sll a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srl a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = or i64 %y, 128
%b = shl i64 %x, %a
%c = sub i64 0, %y
@@ -838,6 +1183,54 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ror a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a4, a1, a2
+; RV32XTHEADBB-NEXT: addi a3, a2, -32
+; RV32XTHEADBB-NEXT: slti a5, a3, 0
+; RV32XTHEADBB-NEXT: neg a5, a5
+; RV32XTHEADBB-NEXT: bltz a3, .LBB13_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a4
+; RV32XTHEADBB-NEXT: j .LBB13_3
+; RV32XTHEADBB-NEXT: .LBB13_2:
+; RV32XTHEADBB-NEXT: srl a3, a0, a2
+; RV32XTHEADBB-NEXT: not a6, a2
+; RV32XTHEADBB-NEXT: slli a7, a1, 1
+; RV32XTHEADBB-NEXT: sll a6, a7, a6
+; RV32XTHEADBB-NEXT: or a3, a3, a6
+; RV32XTHEADBB-NEXT: .LBB13_3:
+; RV32XTHEADBB-NEXT: neg a6, a2
+; RV32XTHEADBB-NEXT: andi t0, a6, 63
+; RV32XTHEADBB-NEXT: addi a7, t0, -32
+; RV32XTHEADBB-NEXT: and a2, a5, a4
+; RV32XTHEADBB-NEXT: bltz a7, .LBB13_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: sll a1, a0, t0
+; RV32XTHEADBB-NEXT: j .LBB13_6
+; RV32XTHEADBB-NEXT: .LBB13_5:
+; RV32XTHEADBB-NEXT: sll a1, a1, a6
+; RV32XTHEADBB-NEXT: not a4, t0
+; RV32XTHEADBB-NEXT: srli a5, a0, 1
+; RV32XTHEADBB-NEXT: srl a4, a5, a4
+; RV32XTHEADBB-NEXT: or a1, a1, a4
+; RV32XTHEADBB-NEXT: .LBB13_6:
+; RV32XTHEADBB-NEXT: sll a0, a0, a6
+; RV32XTHEADBB-NEXT: slti a4, a7, 0
+; RV32XTHEADBB-NEXT: neg a4, a4
+; RV32XTHEADBB-NEXT: and a0, a4, a0
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: or a1, a2, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srl a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sll a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i64 0, %y
%and = and i64 %z, 63
%b = lshr i64 %x, %y
@@ -941,6 +1334,55 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ror a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a4, a1, a2
+; RV32XTHEADBB-NEXT: andi a3, a2, 127
+; RV32XTHEADBB-NEXT: addi a6, a3, -32
+; RV32XTHEADBB-NEXT: slti a5, a6, 0
+; RV32XTHEADBB-NEXT: neg a5, a5
+; RV32XTHEADBB-NEXT: bltz a6, .LBB14_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: srl a3, a1, a3
+; RV32XTHEADBB-NEXT: j .LBB14_3
+; RV32XTHEADBB-NEXT: .LBB14_2:
+; RV32XTHEADBB-NEXT: srl a6, a0, a2
+; RV32XTHEADBB-NEXT: slli a7, a1, 1
+; RV32XTHEADBB-NEXT: not a3, a3
+; RV32XTHEADBB-NEXT: sll a3, a7, a3
+; RV32XTHEADBB-NEXT: or a3, a6, a3
+; RV32XTHEADBB-NEXT: .LBB14_3:
+; RV32XTHEADBB-NEXT: neg a6, a2
+; RV32XTHEADBB-NEXT: andi t0, a6, 63
+; RV32XTHEADBB-NEXT: addi a7, t0, -32
+; RV32XTHEADBB-NEXT: and a2, a5, a4
+; RV32XTHEADBB-NEXT: bltz a7, .LBB14_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: sll a1, a0, t0
+; RV32XTHEADBB-NEXT: j .LBB14_6
+; RV32XTHEADBB-NEXT: .LBB14_5:
+; RV32XTHEADBB-NEXT: sll a1, a1, a6
+; RV32XTHEADBB-NEXT: not a4, t0
+; RV32XTHEADBB-NEXT: srli a5, a0, 1
+; RV32XTHEADBB-NEXT: srl a4, a5, a4
+; RV32XTHEADBB-NEXT: or a1, a1, a4
+; RV32XTHEADBB-NEXT: .LBB14_6:
+; RV32XTHEADBB-NEXT: sll a0, a0, a6
+; RV32XTHEADBB-NEXT: slti a4, a7, 0
+; RV32XTHEADBB-NEXT: neg a4, a4
+; RV32XTHEADBB-NEXT: and a0, a4, a0
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: or a1, a2, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srl a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sll a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = and i64 %y, 127
%b = lshr i64 %x, %a
%c = sub i64 0, %y
@@ -981,6 +1423,22 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ror a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_or_128_or_64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: srl a3, a1, a2
+; RV32XTHEADBB-NEXT: neg a1, a2
+; RV32XTHEADBB-NEXT: sll a1, a0, a1
+; RV32XTHEADBB-NEXT: mv a0, a3
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_or_128_or_64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srl a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sll a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%a = or i64 %y, 128
%b = lshr i64 %x, %a
%c = sub i64 0, %y
@@ -1026,6 +1484,27 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64ZBB-NEXT: sllw a1, a1, a2
; RV64ZBB-NEXT: addw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_shared:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a3, a2, 31
+; RV32XTHEADBB-NEXT: sll a4, a0, a3
+; RV32XTHEADBB-NEXT: neg a3, a3
+; RV32XTHEADBB-NEXT: srl a0, a0, a3
+; RV32XTHEADBB-NEXT: or a0, a4, a0
+; RV32XTHEADBB-NEXT: sll a1, a1, a2
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_shared:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sllw a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: srlw a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: sllw a1, a1, a2
+; RV64XTHEADBB-NEXT: addw a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i32 %amt, 31
%1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
%2 = shl i32 %b, %maskedamt
@@ -1141,6 +1620,62 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64ZBB-NEXT: sll a1, a1, a2
; RV64ZBB-NEXT: add a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_shared:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slli a5, a4, 26
+; RV32XTHEADBB-NEXT: srli a5, a5, 31
+; RV32XTHEADBB-NEXT: mv a7, a0
+; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a7, a1
+; RV32XTHEADBB-NEXT: .LBB17_2:
+; RV32XTHEADBB-NEXT: andi a6, a4, 63
+; RV32XTHEADBB-NEXT: sll t0, a7, a4
+; RV32XTHEADBB-NEXT: bnez a5, .LBB17_4
+; RV32XTHEADBB-NEXT: # %bb.3:
+; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: .LBB17_4:
+; RV32XTHEADBB-NEXT: srli a0, a1, 1
+; RV32XTHEADBB-NEXT: not t1, a4
+; RV32XTHEADBB-NEXT: srl a0, a0, t1
+; RV32XTHEADBB-NEXT: or a5, t0, a0
+; RV32XTHEADBB-NEXT: sll a1, a1, a4
+; RV32XTHEADBB-NEXT: srli a0, a7, 1
+; RV32XTHEADBB-NEXT: srl a7, a0, t1
+; RV32XTHEADBB-NEXT: addi a0, a6, -32
+; RV32XTHEADBB-NEXT: or a1, a1, a7
+; RV32XTHEADBB-NEXT: bltz a0, .LBB17_6
+; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: sll a3, a2, a6
+; RV32XTHEADBB-NEXT: j .LBB17_7
+; RV32XTHEADBB-NEXT: .LBB17_6:
+; RV32XTHEADBB-NEXT: sll a3, a3, a4
+; RV32XTHEADBB-NEXT: srli a7, a2, 1
+; RV32XTHEADBB-NEXT: not a6, a6
+; RV32XTHEADBB-NEXT: srl a6, a7, a6
+; RV32XTHEADBB-NEXT: or a3, a3, a6
+; RV32XTHEADBB-NEXT: .LBB17_7:
+; RV32XTHEADBB-NEXT: sll a2, a2, a4
+; RV32XTHEADBB-NEXT: slti a0, a0, 0
+; RV32XTHEADBB-NEXT: neg a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a2
+; RV32XTHEADBB-NEXT: add a0, a1, a0
+; RV32XTHEADBB-NEXT: sltu a1, a0, a1
+; RV32XTHEADBB-NEXT: add a3, a5, a3
+; RV32XTHEADBB-NEXT: add a1, a3, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_shared:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a3, a2, 63
+; RV64XTHEADBB-NEXT: sll a4, a0, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
+; RV64XTHEADBB-NEXT: srl a0, a0, a3
+; RV64XTHEADBB-NEXT: or a0, a4, a0
+; RV64XTHEADBB-NEXT: sll a1, a1, a2
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i64 %amt, 63
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
%2 = shl i64 %b, %maskedamt
@@ -1183,6 +1718,27 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64ZBB-NEXT: sllw a1, a1, a2
; RV64ZBB-NEXT: addw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_shared:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a3, a2, 31
+; RV32XTHEADBB-NEXT: srl a4, a0, a3
+; RV32XTHEADBB-NEXT: neg a3, a3
+; RV32XTHEADBB-NEXT: sll a0, a0, a3
+; RV32XTHEADBB-NEXT: or a0, a4, a0
+; RV32XTHEADBB-NEXT: sll a1, a1, a2
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_shared:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srlw a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: sllw a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: sllw a1, a1, a2
+; RV64XTHEADBB-NEXT: addw a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i32 %amt, 31
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
%2 = shl i32 %b, %maskedamt
@@ -1296,6 +1852,61 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64ZBB-NEXT: sll a1, a1, a2
; RV64ZBB-NEXT: add a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_shared:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a7, a4, 32
+; RV32XTHEADBB-NEXT: mv a6, a1
+; RV32XTHEADBB-NEXT: beqz a7, .LBB19_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a6, a0
+; RV32XTHEADBB-NEXT: .LBB19_2:
+; RV32XTHEADBB-NEXT: andi a5, a4, 63
+; RV32XTHEADBB-NEXT: srl t0, a6, a4
+; RV32XTHEADBB-NEXT: beqz a7, .LBB19_4
+; RV32XTHEADBB-NEXT: # %bb.3:
+; RV32XTHEADBB-NEXT: mv a0, a1
+; RV32XTHEADBB-NEXT: .LBB19_4:
+; RV32XTHEADBB-NEXT: slli a1, a0, 1
+; RV32XTHEADBB-NEXT: not a7, a4
+; RV32XTHEADBB-NEXT: sll a1, a1, a7
+; RV32XTHEADBB-NEXT: or a1, a1, t0
+; RV32XTHEADBB-NEXT: srl t0, a0, a4
+; RV32XTHEADBB-NEXT: slli a6, a6, 1
+; RV32XTHEADBB-NEXT: sll a6, a6, a7
+; RV32XTHEADBB-NEXT: addi a0, a5, -32
+; RV32XTHEADBB-NEXT: or a6, a6, t0
+; RV32XTHEADBB-NEXT: bltz a0, .LBB19_6
+; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: sll a3, a2, a5
+; RV32XTHEADBB-NEXT: j .LBB19_7
+; RV32XTHEADBB-NEXT: .LBB19_6:
+; RV32XTHEADBB-NEXT: sll a3, a3, a4
+; RV32XTHEADBB-NEXT: srli a7, a2, 1
+; RV32XTHEADBB-NEXT: not a5, a5
+; RV32XTHEADBB-NEXT: srl a5, a7, a5
+; RV32XTHEADBB-NEXT: or a3, a3, a5
+; RV32XTHEADBB-NEXT: .LBB19_7:
+; RV32XTHEADBB-NEXT: sll a2, a2, a4
+; RV32XTHEADBB-NEXT: slti a0, a0, 0
+; RV32XTHEADBB-NEXT: neg a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a2
+; RV32XTHEADBB-NEXT: add a0, a6, a0
+; RV32XTHEADBB-NEXT: sltu a2, a0, a6
+; RV32XTHEADBB-NEXT: add a1, a1, a3
+; RV32XTHEADBB-NEXT: add a1, a1, a2
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_shared:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a3, a2, 63
+; RV64XTHEADBB-NEXT: srl a4, a0, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
+; RV64XTHEADBB-NEXT: sll a0, a0, a3
+; RV64XTHEADBB-NEXT: or a0, a4, a0
+; RV64XTHEADBB-NEXT: sll a1, a1, a2
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i64 %amt, 63
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
%2 = shl i64 %b, %maskedamt
@@ -1342,6 +1953,32 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64ZBB-NEXT: rolw a1, a1, a2
; RV64ZBB-NEXT: addw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_32_mask_multiple:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a2, a2, 31
+; RV32XTHEADBB-NEXT: sll a3, a0, a2
+; RV32XTHEADBB-NEXT: neg a4, a2
+; RV32XTHEADBB-NEXT: srl a0, a0, a4
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: sll a2, a1, a2
+; RV32XTHEADBB-NEXT: srl a1, a1, a4
+; RV32XTHEADBB-NEXT: or a1, a2, a1
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_32_mask_multiple:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a2, a2, 31
+; RV64XTHEADBB-NEXT: sllw a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: srlw a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: sllw a2, a1, a2
+; RV64XTHEADBB-NEXT: srlw a1, a1, a4
+; RV64XTHEADBB-NEXT: or a1, a2, a1
+; RV64XTHEADBB-NEXT: addw a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i32 %amt, 31
%1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
%2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt)
@@ -1458,6 +2095,64 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64ZBB-NEXT: rol a1, a1, a2
; RV64ZBB-NEXT: add a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_mask_multiple:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slli a5, a4, 26
+; RV32XTHEADBB-NEXT: srli a5, a5, 31
+; RV32XTHEADBB-NEXT: mv a6, a1
+; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a6, a0
+; RV32XTHEADBB-NEXT: .LBB21_2:
+; RV32XTHEADBB-NEXT: bnez a5, .LBB21_4
+; RV32XTHEADBB-NEXT: # %bb.3:
+; RV32XTHEADBB-NEXT: mv a0, a1
+; RV32XTHEADBB-NEXT: .LBB21_4:
+; RV32XTHEADBB-NEXT: sll a7, a6, a4
+; RV32XTHEADBB-NEXT: srli t0, a0, 1
+; RV32XTHEADBB-NEXT: not a1, a4
+; RV32XTHEADBB-NEXT: srl t0, t0, a1
+; RV32XTHEADBB-NEXT: sll t1, a0, a4
+; RV32XTHEADBB-NEXT: srli a0, a6, 1
+; RV32XTHEADBB-NEXT: srl t2, a0, a1
+; RV32XTHEADBB-NEXT: mv a0, a3
+; RV32XTHEADBB-NEXT: bnez a5, .LBB21_6
+; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: mv a0, a2
+; RV32XTHEADBB-NEXT: .LBB21_6:
+; RV32XTHEADBB-NEXT: or a6, a7, t0
+; RV32XTHEADBB-NEXT: or a7, t1, t2
+; RV32XTHEADBB-NEXT: sll t0, a0, a4
+; RV32XTHEADBB-NEXT: bnez a5, .LBB21_8
+; RV32XTHEADBB-NEXT: # %bb.7:
+; RV32XTHEADBB-NEXT: mv a2, a3
+; RV32XTHEADBB-NEXT: .LBB21_8:
+; RV32XTHEADBB-NEXT: srli a3, a2, 1
+; RV32XTHEADBB-NEXT: srl a3, a3, a1
+; RV32XTHEADBB-NEXT: or a3, t0, a3
+; RV32XTHEADBB-NEXT: sll a2, a2, a4
+; RV32XTHEADBB-NEXT: srli a0, a0, 1
+; RV32XTHEADBB-NEXT: srl a0, a0, a1
+; RV32XTHEADBB-NEXT: or a0, a2, a0
+; RV32XTHEADBB-NEXT: add a1, a7, a0
+; RV32XTHEADBB-NEXT: add a0, a6, a3
+; RV32XTHEADBB-NEXT: sltu a2, a0, a6
+; RV32XTHEADBB-NEXT: add a1, a1, a2
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_mask_multiple:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a2, a2, 63
+; RV64XTHEADBB-NEXT: sll a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: srl a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: sll a2, a1, a2
+; RV64XTHEADBB-NEXT: srl a1, a1, a4
+; RV64XTHEADBB-NEXT: or a1, a2, a1
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i64 %amt, 63
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
%2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt)
@@ -1503,6 +2198,32 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64ZBB-NEXT: rorw a1, a1, a2
; RV64ZBB-NEXT: addw a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_32_mask_multiple:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a2, a2, 31
+; RV32XTHEADBB-NEXT: srl a3, a0, a2
+; RV32XTHEADBB-NEXT: neg a4, a2
+; RV32XTHEADBB-NEXT: sll a0, a0, a4
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: srl a2, a1, a2
+; RV32XTHEADBB-NEXT: sll a1, a1, a4
+; RV32XTHEADBB-NEXT: or a1, a2, a1
+; RV32XTHEADBB-NEXT: add a0, a0, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_32_mask_multiple:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a2, a2, 31
+; RV64XTHEADBB-NEXT: srlw a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: sllw a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: srlw a2, a1, a2
+; RV64XTHEADBB-NEXT: sllw a1, a1, a4
+; RV64XTHEADBB-NEXT: or a1, a2, a1
+; RV64XTHEADBB-NEXT: addw a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i32 %amt, 31
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
%2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt)
@@ -1617,6 +2338,63 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64ZBB-NEXT: ror a1, a1, a2
; RV64ZBB-NEXT: add a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_mask_multiple:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: andi a5, a4, 32
+; RV32XTHEADBB-NEXT: mv a6, a0
+; RV32XTHEADBB-NEXT: beqz a5, .LBB23_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a6, a1
+; RV32XTHEADBB-NEXT: .LBB23_2:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB23_4
+; RV32XTHEADBB-NEXT: # %bb.3:
+; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: .LBB23_4:
+; RV32XTHEADBB-NEXT: srl a7, a6, a4
+; RV32XTHEADBB-NEXT: slli t0, a1, 1
+; RV32XTHEADBB-NEXT: not a0, a4
+; RV32XTHEADBB-NEXT: sll t0, t0, a0
+; RV32XTHEADBB-NEXT: srl t1, a1, a4
+; RV32XTHEADBB-NEXT: slli a6, a6, 1
+; RV32XTHEADBB-NEXT: sll t2, a6, a0
+; RV32XTHEADBB-NEXT: mv a6, a2
+; RV32XTHEADBB-NEXT: beqz a5, .LBB23_6
+; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: mv a6, a3
+; RV32XTHEADBB-NEXT: .LBB23_6:
+; RV32XTHEADBB-NEXT: or a1, t0, a7
+; RV32XTHEADBB-NEXT: or a7, t2, t1
+; RV32XTHEADBB-NEXT: srl t0, a6, a4
+; RV32XTHEADBB-NEXT: beqz a5, .LBB23_8
+; RV32XTHEADBB-NEXT: # %bb.7:
+; RV32XTHEADBB-NEXT: mv a3, a2
+; RV32XTHEADBB-NEXT: .LBB23_8:
+; RV32XTHEADBB-NEXT: slli a2, a3, 1
+; RV32XTHEADBB-NEXT: sll a2, a2, a0
+; RV32XTHEADBB-NEXT: or a2, a2, t0
+; RV32XTHEADBB-NEXT: srl a3, a3, a4
+; RV32XTHEADBB-NEXT: slli a6, a6, 1
+; RV32XTHEADBB-NEXT: sll a0, a6, a0
+; RV32XTHEADBB-NEXT: or a0, a0, a3
+; RV32XTHEADBB-NEXT: add a7, a7, a0
+; RV32XTHEADBB-NEXT: add a0, a1, a2
+; RV32XTHEADBB-NEXT: sltu a1, a0, a1
+; RV32XTHEADBB-NEXT: add a1, a7, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_mask_multiple:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: andi a2, a2, 63
+; RV64XTHEADBB-NEXT: srl a3, a0, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
+; RV64XTHEADBB-NEXT: sll a0, a0, a4
+; RV64XTHEADBB-NEXT: or a0, a3, a0
+; RV64XTHEADBB-NEXT: srl a2, a1, a2
+; RV64XTHEADBB-NEXT: sll a1, a1, a4
+; RV64XTHEADBB-NEXT: or a1, a2, a1
+; RV64XTHEADBB-NEXT: add a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
%maskedamt = and i64 %amt, 63
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
%2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt)
@@ -1721,6 +2499,56 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: rol a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotl_64_zext:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: neg a4, a2
+; RV32XTHEADBB-NEXT: sll a5, a0, a2
+; RV32XTHEADBB-NEXT: addi a3, a2, -32
+; RV32XTHEADBB-NEXT: slti a6, a3, 0
+; RV32XTHEADBB-NEXT: neg a6, a6
+; RV32XTHEADBB-NEXT: bltz a3, .LBB24_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a5
+; RV32XTHEADBB-NEXT: j .LBB24_3
+; RV32XTHEADBB-NEXT: .LBB24_2:
+; RV32XTHEADBB-NEXT: sll a3, a1, a2
+; RV32XTHEADBB-NEXT: not a7, a2
+; RV32XTHEADBB-NEXT: srli t0, a0, 1
+; RV32XTHEADBB-NEXT: srl a7, t0, a7
+; RV32XTHEADBB-NEXT: or a3, a3, a7
+; RV32XTHEADBB-NEXT: .LBB24_3:
+; RV32XTHEADBB-NEXT: and a5, a6, a5
+; RV32XTHEADBB-NEXT: li a6, 32
+; RV32XTHEADBB-NEXT: sub a7, a6, a2
+; RV32XTHEADBB-NEXT: srl a6, a1, a4
+; RV32XTHEADBB-NEXT: bltz a7, .LBB24_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: mv a0, a6
+; RV32XTHEADBB-NEXT: j .LBB24_6
+; RV32XTHEADBB-NEXT: .LBB24_5:
+; RV32XTHEADBB-NEXT: li t0, 64
+; RV32XTHEADBB-NEXT: sub a2, t0, a2
+; RV32XTHEADBB-NEXT: srl a0, a0, a4
+; RV32XTHEADBB-NEXT: not a2, a2
+; RV32XTHEADBB-NEXT: slli a1, a1, 1
+; RV32XTHEADBB-NEXT: sll a1, a1, a2
+; RV32XTHEADBB-NEXT: or a0, a0, a1
+; RV32XTHEADBB-NEXT: .LBB24_6:
+; RV32XTHEADBB-NEXT: slti a1, a7, 0
+; RV32XTHEADBB-NEXT: neg a1, a1
+; RV32XTHEADBB-NEXT: and a1, a1, a6
+; RV32XTHEADBB-NEXT: or a1, a3, a1
+; RV32XTHEADBB-NEXT: or a0, a5, a0
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotl_64_zext:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: sll a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: srl a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 64, %y
%zext = zext i32 %z to i64
%zexty = zext i32 %y to i64
@@ -1827,6 +2655,56 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ror a0, a0, a1
; RV64ZBB-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: rotr_64_zext:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: neg a4, a2
+; RV32XTHEADBB-NEXT: srl a5, a1, a2
+; RV32XTHEADBB-NEXT: addi a3, a2, -32
+; RV32XTHEADBB-NEXT: slti a6, a3, 0
+; RV32XTHEADBB-NEXT: neg a6, a6
+; RV32XTHEADBB-NEXT: bltz a3, .LBB25_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: mv a3, a5
+; RV32XTHEADBB-NEXT: j .LBB25_3
+; RV32XTHEADBB-NEXT: .LBB25_2:
+; RV32XTHEADBB-NEXT: srl a3, a0, a2
+; RV32XTHEADBB-NEXT: not a7, a2
+; RV32XTHEADBB-NEXT: slli t0, a1, 1
+; RV32XTHEADBB-NEXT: sll a7, t0, a7
+; RV32XTHEADBB-NEXT: or a3, a3, a7
+; RV32XTHEADBB-NEXT: .LBB25_3:
+; RV32XTHEADBB-NEXT: and a5, a6, a5
+; RV32XTHEADBB-NEXT: li a6, 32
+; RV32XTHEADBB-NEXT: sub a7, a6, a2
+; RV32XTHEADBB-NEXT: sll a6, a0, a4
+; RV32XTHEADBB-NEXT: bltz a7, .LBB25_5
+; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: mv a1, a6
+; RV32XTHEADBB-NEXT: j .LBB25_6
+; RV32XTHEADBB-NEXT: .LBB25_5:
+; RV32XTHEADBB-NEXT: li t0, 64
+; RV32XTHEADBB-NEXT: sub a2, t0, a2
+; RV32XTHEADBB-NEXT: sll a1, a1, a4
+; RV32XTHEADBB-NEXT: not a2, a2
+; RV32XTHEADBB-NEXT: srli a0, a0, 1
+; RV32XTHEADBB-NEXT: srl a0, a0, a2
+; RV32XTHEADBB-NEXT: or a1, a1, a0
+; RV32XTHEADBB-NEXT: .LBB25_6:
+; RV32XTHEADBB-NEXT: slti a0, a7, 0
+; RV32XTHEADBB-NEXT: neg a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a6
+; RV32XTHEADBB-NEXT: or a0, a3, a0
+; RV32XTHEADBB-NEXT: or a1, a5, a1
+; RV32XTHEADBB-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: rotr_64_zext:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srl a2, a0, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
+; RV64XTHEADBB-NEXT: sll a0, a0, a1
+; RV64XTHEADBB-NEXT: or a0, a2, a0
+; RV64XTHEADBB-NEXT: ret
%z = sub i32 64, %y
%zext = zext i32 %z to i64
%zexty = zext i32 %y to i64
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
new file mode 100644
index 0000000000000..2e3156d8e7c3b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -0,0 +1,453 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i32 @ctlz_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctlz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a0, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1365
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi a1, a1, 819
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi a1, a1, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi a1, a1, 257
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB0_2:
+; RV32I-NEXT: li a0, 32
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: ctlz_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctlz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi s4, a2, 1365
+; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi s5, a1, 819
+; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi s6, a1, -241
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi s3, a1, 257
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: srli a0, s2, 1
+; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: bnez s0, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: j .LBB1_3
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: srli a0, s1, 24
+; RV32I-NEXT: .LBB1_3:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: ctlz_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a1, .LBB1_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, 32
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB1_2:
+; RV32XTHEADBB-NEXT: th.ff1 a0, a1
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i32 @cttz_i32(i32 %a) nounwind {
+; RV32I-LABEL: cttz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a0, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 30667
+; RV32I-NEXT: addi a1, a1, 1329
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: srli a0, a0, 27
+; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
+; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: li a0, 32
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: cttz_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2
+; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB2_2:
+; RV32XTHEADBB-NEXT: li a0, 32
+; RV32XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV32I-LABEL: cttz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: lui a1, 30667
+; RV32I-NEXT: addi s3, a1, 1329
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
+; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT: neg a0, s2
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: bnez s2, .LBB3_3
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a0, 32
+; RV32I-NEXT: beqz s0, .LBB3_4
+; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: srli s1, s1, 27
+; RV32I-NEXT: add s1, s4, s1
+; RV32I-NEXT: lbu a0, 0(s1)
+; RV32I-NEXT: j .LBB3_5
+; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: srli a0, a0, 27
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: bnez s0, .LBB3_2
+; RV32I-NEXT: .LBB3_4:
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: .LBB3_5:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: cttz_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2
+; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: addi a0, a1, -1
+; RV32XTHEADBB-NEXT: not a1, a1
+; RV32XTHEADBB-NEXT: and a0, a1, a0
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 64
+; RV32XTHEADBB-NEXT: j .LBB3_3
+; RV32XTHEADBB-NEXT: .LBB3_2:
+; RV32XTHEADBB-NEXT: addi a1, a0, -1
+; RV32XTHEADBB-NEXT: not a0, a0
+; RV32XTHEADBB-NEXT: and a0, a0, a1
+; RV32XTHEADBB-NEXT: th.ff1 a0, a0
+; RV32XTHEADBB-NEXT: li a1, 32
+; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: sub a0, a1, a0
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+define i32 @sextb_i32(i32 %a) nounwind {
+; RV32I-LABEL: sextb_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai a0, a0, 24
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sextb_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 24
+ %shr = ashr exact i32 %shl, 24
+ ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV32I-LABEL: sextb_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 24
+; RV32I-NEXT: srai a0, a1, 24
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sextb_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0
+; RV32XTHEADBB-NEXT: srai a1, a0, 31
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 56
+ %shr = ashr exact i64 %shl, 56
+ ret i64 %shr
+}
+
+define i32 @sexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: sexth_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai a0, a0, 16
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sexth_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 16
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: sexth_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: srai a0, a1, 16
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sexth_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: srai a1, a0, 31
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 48
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: zexth_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: zexth_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: ret
+ %and = and i32 %a, 65535
+ ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: zexth_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: zexth_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: ret
+ %and = and i64 %a, 65535
+ ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i32 @bswap_i32(i32 %a) nounwind {
+; RV32I-LABEL: bswap_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -256
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: bswap_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.rev a0, a0
+; RV32XTHEADBB-NEXT: ret
+ %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV32I-LABEL: bswap_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: lui a3, 16
+; RV32I-NEXT: addi a3, a3, -256
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: and a4, a1, a3
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: or a2, a1, a2
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: srli a4, a0, 24
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: bswap_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: th.rev a2, a1
+; RV32XTHEADBB-NEXT: th.rev a1, a0
+; RV32XTHEADBB-NEXT: mv a0, a2
+; RV32XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
new file mode 100644
index 0000000000000..6b032d39d9f83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -0,0 +1,768 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 209715
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 61681
+; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB0_2:
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 209715
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 61681
+; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: j .LBB1_3
+; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: .LBB1_3: # %cond.end
+; RV64I-NEXT: li a1, 31
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: log2_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 31
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ %2 = sub i32 31, %1
+ ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: li s0, 32
+; RV64I-NEXT: li a1, 32
+; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 209715
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 61681
+; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: .LBB2_2: # %cond.end
+; RV64I-NEXT: sub a0, s0, a1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: log2_ceil_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addiw a0, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 32
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = sub i32 %a, 1
+ %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %3 = sub i32 32, %2
+ ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: srliw a0, a0, 1
+; RV64I-NEXT: or a0, s0, a0
+; RV64I-NEXT: srliw a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 209715
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 61681
+; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: xori a0, a0, 31
+; RV64I-NEXT: snez a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: findLastSet_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: not a1, a0
+; RV64XTHEADBB-NEXT: slli a1, a1, 32
+; RV64XTHEADBB-NEXT: th.ff0 a1, a1
+; RV64XTHEADBB-NEXT: xori a1, a1, 31
+; RV64XTHEADBB-NEXT: snez a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
+; RV64XTHEADBB-NEXT: or a0, a0, a1
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+ %2 = xor i32 31, %1
+ %3 = icmp eq i32 %a, 0
+ %4 = select i1 %3, i32 -1, i32 %2
+ ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a0, a0, 1
+; RV64I-NEXT: beqz a0, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srliw a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 209715
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 61681
+; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srliw a0, a0, 1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 32
+; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = lshr i32 %a, 1
+ %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: lui a1, %hi(.LCPI5_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1)
+; RV64I-NEXT: lui a2, %hi(.LCPI5_1)
+; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2)
+; RV64I-NEXT: srli a3, a0, 1
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: and a1, a0, a2
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: lui a2, %hi(.LCPI5_2)
+; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: lui a1, %hi(.LCPI5_3)
+; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1)
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB5_2:
+; RV64I-NEXT: li a0, 64
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 30667
+; RV64I-NEXT: addiw a1, a1, 1329
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 27
+; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB6_2:
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: cttz_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: beqz a0, .LBB6_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB6_2:
+; RV64XTHEADBB-NEXT: li a0, 32
+; RV64XTHEADBB-NEXT: ret
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: ctzw a0, a0
+; RV64ZBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 30667
+; RV64I-NEXT: addiw a1, a1, 1329
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 27
+; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: lui a1, 30667
+; RV64I-NEXT: addiw a1, a1, 1329
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 27
+; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: snez a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: findFirstSet_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a2, a0
+; RV64XTHEADBB-NEXT: and a1, a2, a1
+; RV64XTHEADBB-NEXT: th.ff1 a1, a1
+; RV64XTHEADBB-NEXT: li a2, 64
+; RV64XTHEADBB-NEXT: sub a2, a2, a1
+; RV64XTHEADBB-NEXT: snez a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
+; RV64XTHEADBB-NEXT: or a0, a0, a2
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ %2 = icmp eq i32 %a, 0
+ %3 = select i1 %2, i32 -1, i32 %1
+ ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: lui a1, 30667
+; RV64I-NEXT: addiw a1, a1, 1329
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srliw a0, a0, 27
+; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: seqz a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: ffs_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a2, a0
+; RV64XTHEADBB-NEXT: and a1, a2, a1
+; RV64XTHEADBB-NEXT: th.ff1 a1, a1
+; RV64XTHEADBB-NEXT: li a2, 65
+; RV64XTHEADBB-NEXT: sub a2, a2, a1
+; RV64XTHEADBB-NEXT: seqz a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
+; RV64XTHEADBB-NEXT: and a0, a0, a2
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ %2 = add i32 %1, 1
+ %3 = icmp eq i32 %a, 0
+ %4 = select i1 %3, i32 0, i32 %2
+ ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 58
+; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB10_2:
+; RV64I-NEXT: li a0, 64
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: cttz_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: beqz a0, .LBB10_2
+; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT: addi a1, a0, -1
+; RV64XTHEADBB-NEXT: not a0, a0
+; RV64XTHEADBB-NEXT: and a0, a0, a1
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: li a1, 64
+; RV64XTHEADBB-NEXT: sub a0, a1, a0
+; RV64XTHEADBB-NEXT: ret
+; RV64XTHEADBB-NEXT: .LBB10_2:
+; RV64XTHEADBB-NEXT: li a0, 64
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sextb_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 24
+ %shr = ashr exact i32 %shl, 24
+ ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sextb_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 56
+ %shr = ashr exact i64 %shl, 56
+ ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexth_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 16
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexth_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 48
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: zexth_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: ret
+ %and = and i32 %a, 65535
+ ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: zexth_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT: ret
+ %and = and i64 %a, 65535
+ ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: slliw a0, a0, 24
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.revw a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a2, a0, 8
+; RV64I-NEXT: lui a3, 16
+; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: or a2, a2, a4
+; RV64I-NEXT: and a3, a0, a3
+; RV64I-NEXT: slli a3, a3, 8
+; RV64I-NEXT: slli a0, a0, 24
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: sw a0, 0(a1)
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32_nosext:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.revw a0, a0
+; RV64XTHEADBB-NEXT: sw a0, 0(a1)
+; RV64XTHEADBB-NEXT: ret
+ %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+ store i32 %1, ptr %x
+ ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 40
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: lui a4, 4080
+; RV64I-NEXT: and a3, a3, a4
+; RV64I-NEXT: srli a5, a0, 8
+; RV64I-NEXT: srliw a5, a5, 24
+; RV64I-NEXT: slli a5, a5, 24
+; RV64I-NEXT: or a3, a5, a3
+; RV64I-NEXT: or a1, a3, a1
+; RV64I-NEXT: and a4, a0, a4
+; RV64I-NEXT: slli a4, a4, 24
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a2, a2, 40
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: bswap_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.rev a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %1 = call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %1
+}
More information about the llvm-commits
mailing list