[llvm] 8912200 - [RISCV] Add experimental support for making i32 a legal type on RV64 in SelectionDAG. (#70357)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 09:36:47 PDT 2023


Author: Craig Topper
Date: 2023-11-01T09:36:41-07:00
New Revision: 8912200966409f18e27aa0627e521faa190029a6

URL: https://github.com/llvm/llvm-project/commit/8912200966409f18e27aa0627e521faa190029a6
DIFF: https://github.com/llvm/llvm-project/commit/8912200966409f18e27aa0627e521faa190029a6.diff

LOG: [RISCV] Add experimental support for making i32 a legal type on RV64 in SelectionDAG. (#70357)

This will select i32 operations directly to W instructions without
custom nodes. Hopefully this can allow us to be less dependent on
hasAllNBitUsers to recover i32 operations in RISCVISelDAGToDAG.cpp.

This support is enabled with a command line option that is off by
default.

Generated code is still not optimal.

I've duplicated many test cases for this, but its not complete. Enabling this runs all existing lit tests without crashing.

Added: 
    llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
    llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/Target/RISCV/RISCVGISel.td
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVInstrInfoA.td
    llvm/lib/Target/RISCV/RISCVInstrInfoD.td
    llvm/lib/Target/RISCV/RISCVInstrInfoF.td
    llvm/lib/Target/RISCV/RISCVInstrInfoM.td
    llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
    llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
    llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
    llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f19beea3a3ed8b7..82751a442dbc3bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5023,6 +5023,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::SREM:
   case ISD::UDIV:
   case ISD::UREM:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -5039,12 +5043,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
         break;
       case ISD::SDIV:
       case ISD::SREM:
+      case ISD::SMIN:
+      case ISD::SMAX:
         ExtOp = ISD::SIGN_EXTEND;
         break;
       case ISD::UDIV:
       case ISD::UREM:
         ExtOp = ISD::ZERO_EXTEND;
         break;
+      case ISD::UMIN:
+      case ISD::UMAX:
+        if (TLI.isSExtCheaperThanZExt(OVT, NVT))
+          ExtOp = ISD::SIGN_EXTEND;
+        else
+          ExtOp = ISD::ZERO_EXTEND;
+        break;
       }
       TruncOp = ISD::TRUNCATE;
     }
@@ -5166,7 +5179,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     unsigned ExtOp = ISD::FP_EXTEND;
     if (NVT.isInteger()) {
       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
-      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      if (isSignedIntSetCC(CCCode) ||
+          TLI.isSExtCheaperThanZExt(Node->getOperand(0).getValueType(), NVT))
+        ExtOp = ISD::SIGN_EXTEND;
+      else
+        ExtOp = ISD::ZERO_EXTEND;
     }
     if (Node->isStrictFPOpcode()) {
       SDValue InChain = Node->getOperand(0);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5bd04e2360679d4..2d2585b3db73226 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -371,7 +371,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
         N->getMemOperand());
     ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
     ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
-    return Res.getValue(1);
+    return DAG.getSExtOrTrunc(Res.getValue(1), SDLoc(N), NVT);
   }
 
   // Op2 is used for the comparison and thus must be extended according to the

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 229f220d8460bda..29505f7505ba25c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3468,7 +3468,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     }
 
     if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
-        (TLI.isOperationLegalOrCustom(Opc, VT) ||
+        (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
          (UseScalarMinMax &&
           TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
         // If the underlying comparison instruction is used by any other

diff  --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index fcac2f365596260..458bf9a2efde4d6 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -21,8 +21,6 @@ def simm12Plus1 : ImmLeaf<XLenVT, [{
 def simm12Plus1i32 : ImmLeaf<i32, [{
     return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
 
-def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
-
 def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
 
 // FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate
@@ -49,11 +47,6 @@ def GIAddrRegImm :
   GIComplexOperandMatcher<s32, "selectAddrRegImm">,
   GIComplexPatternEquiv<AddrRegImm>;
 
-// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
-// checking happy so we can use ADDIW which expects an XLen immediate.
-def as_i64imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
-}]>;
 def gi_as_i64imm : GICustomOperandRenderer<"renderImm">,
   GISDNodeXFormEquiv<as_i64imm>;
 
@@ -88,14 +81,10 @@ def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
           (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
 
 let Predicates = [IsRV64] in {
-def : Pat<(i32 (add GPR:$rs1, GPR:$rs2)), (ADDW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (sub GPR:$rs1, GPR:$rs2)), (SUBW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>;
 
-def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
-          (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
 def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
           (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
 
@@ -116,19 +105,6 @@ def : Pat<(i32 (sra GPR:$rs1, uimm5i32:$imm)),
           (SRAIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
 def : Pat<(i32 (srl GPR:$rs1, uimm5i32:$imm)),
           (SRLIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
-
-def : Pat<(i64 (sext i32:$rs)), (ADDIW GPR:$rs, 0)>;
-}
-
-let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
-def : Pat<(i32 (mul GPR:$rs1, GPR:$rs2)), (MULW GPR:$rs1, GPR:$rs2)>;
-}
-
-let Predicates = [HasStdExtM, IsRV64] in {
-def : Pat<(i32 (sdiv GPR:$rs1, GPR:$rs2)), (DIVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (srem GPR:$rs1, GPR:$rs2)), (REMW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (udiv GPR:$rs1, GPR:$rs2)), (DIVUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (urem GPR:$rs1, GPR:$rs2)), (REMUW GPR:$rs1, GPR:$rs2)>;
 }
 
 let Predicates = [HasStdExtZba, IsRV64] in {
@@ -136,13 +112,8 @@ let Predicates = [HasStdExtZba, IsRV64] in {
 // in SDISel for RV64, which is not the case in GISel.
 def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt),
           (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
-
-def : Pat<(i64 (zext i32:$rs)), (ADD_UW GPR:$rs, (XLenVT X0))>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [IsRV64, NotHasStdExtZba] in
-def: Pat<(i64 (zext i32:$rs)), (SRLI (SLLI GPR:$rs, 32), 32)>;
-
 // Ptr type used in patterns with GlobalISelEmitter
 def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
 
@@ -196,8 +167,6 @@ def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty GPR:$rs2))),
           (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
 }
 
-// Define pattern expansions for load/extload and store/truncstore operations
-// for ptr return type
 let Predicates = [IsRV32] in {
 def : LdPat<load, LW, PtrVT>;
 def : StPat<store, SW, GPR, PtrVT>;
@@ -206,18 +175,4 @@ def : StPat<store, SW, GPR, PtrVT>;
 let Predicates = [IsRV64] in {
 def : LdPat<load, LD, PtrVT>;
 def : StPat<store, SD, GPR, PtrVT>;
-
-// Define pattern expansions for rv64 load/extloads and store/truncstore
-// operations for i32 return type
-def : LdPat<sextloadi8, LB, i32>;
-def : LdPat<extloadi8, LBU, i32>;
-def : LdPat<zextloadi8, LBU, i32>;
-def : LdPat<sextloadi16, LH, i32>;
-def : LdPat<extloadi16, LH, i32>;
-def : LdPat<zextloadi16, LHU, i32>;
-def : LdPat<load, LW, i32>;
-
-def : StPat<truncstorei8, SB, GPR, i32>;
-def : StPat<truncstorei16, SH, GPR, i32>;
-def : StPat<store, SW, GPR, i32>;
-} // Predicates = [IsRV64]
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9f3c387914944b7..51a235bf2ca1861 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -67,8 +67,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
       SDLoc DL(N);
       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
-      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
-                               N->getOperand(0), VL);
+      SDValue Src = N->getOperand(0);
+      if (VT.isInteger())
+        Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
+                              N->getOperand(0));
+      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
       break;
     }
     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
@@ -833,7 +836,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
   switch (Opcode) {
   case ISD::Constant: {
-    assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
+    assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
     auto *ConstNode = cast<ConstantSDNode>(Node);
     if (ConstNode->isZero()) {
       SDValue New =
@@ -3299,6 +3302,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
   case RISCV::TH_MULAH:
   case RISCV::TH_MULSW:
   case RISCV::TH_MULSH:
+    if (N0.getValueType() == MVT::i32)
+      break;
+
     // Result is already sign extended just remove the sext.w.
     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
     ReplaceUses(N, N0.getNode());

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e9f80432ab190c7..67f09377bf45e48 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -75,6 +75,10 @@ static cl::opt<int>
                        "use for creating a floating-point immediate value"),
               cl::init(2));
 
+static cl::opt<bool>
+    RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
+                 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
+
 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -115,6 +119,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   // Set up the register classes.
   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
+  if (Subtarget.is64Bit() && RV64LegalI32)
+    addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
 
   if (Subtarget.hasStdExtZfhOrZfhmin())
     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
@@ -237,8 +243,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   setOperationAction(ISD::BR_CC, XLenVT, Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::BR_CC, MVT::i32, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
 
   setCondCodeAction(ISD::SETLE, XLenVT, Expand);
   setCondCodeAction(ISD::SETGT, XLenVT, Custom);
@@ -247,6 +257,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
   setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
 
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SETCC, MVT::i32, Promote);
+
   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
 
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -262,14 +275,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.is64Bit()) {
     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
-    setOperationAction(ISD::LOAD, MVT::i32, Custom);
+    if (!RV64LegalI32)
+      setOperationAction(ISD::LOAD, MVT::i32, Custom);
 
-    setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
-                       MVT::i32, Custom);
+    if (RV64LegalI32)
+      setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::i32, Promote);
+    else
+      setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+                         MVT::i32, Custom);
 
-    setOperationAction(ISD::SADDO, MVT::i32, Custom);
-    setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
-                       MVT::i32, Custom);
+    if (!RV64LegalI32) {
+      setOperationAction(ISD::SADDO, MVT::i32, Custom);
+      setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+                         MVT::i32, Custom);
+    }
   } else {
     setLibcallName(
         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -277,19 +296,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setLibcallName(RTLIB::MULO_I64, nullptr);
   }
 
-  if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
+  if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
-  else if (Subtarget.is64Bit())
-    setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
-  else
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::MUL, MVT::i32, Promote);
+  } else if (Subtarget.is64Bit()) {
+    setOperationAction(ISD::MUL, MVT::i128, Custom);
+    if (!RV64LegalI32)
+      setOperationAction(ISD::MUL, MVT::i32, Custom);
+  } else {
     setOperationAction(ISD::MUL, MVT::i64, Custom);
+  }
 
-  if (!Subtarget.hasStdExtM())
+  if (!Subtarget.hasStdExtM()) {
     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
                        XLenVT, Expand);
-  else if (Subtarget.is64Bit())
-    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
-                       {MVT::i8, MVT::i16, MVT::i32}, Custom);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+                         Promote);
+  } else if (Subtarget.is64Bit()) {
+    if (!RV64LegalI32)
+      setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+                         {MVT::i8, MVT::i16, MVT::i32}, Custom);
+  }
+
+  if (RV64LegalI32 && Subtarget.is64Bit()) {
+    setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
+    setOperationAction(
+        {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
+        Expand);
+  }
 
   setOperationAction(
       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
@@ -299,7 +335,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                      Custom);
 
   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
-    if (Subtarget.is64Bit())
+    if (!RV64LegalI32 && Subtarget.is64Bit())
       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
   } else if (Subtarget.hasVendorXTHeadBb()) {
     if (Subtarget.is64Bit())
@@ -307,6 +343,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
   } else {
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
   }
 
   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
@@ -316,6 +354,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                       Subtarget.hasVendorXTHeadBb())
                          ? Legal
                          : Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::BSWAP, MVT::i32,
+                       (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+                        Subtarget.hasVendorXTHeadBb())
+                           ? Promote
+                           : Expand);
+
   // Zbkb can use rev8+brev8 to implement bitreverse.
   setOperationAction(ISD::BITREVERSE, XLenVT,
                      Subtarget.hasStdExtZbkb() ? Custom : Expand);
@@ -323,30 +368,49 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtZbb()) {
     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
                        Legal);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
+                         Promote);
 
-    if (Subtarget.is64Bit())
-      setOperationAction(
-          {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF},
-          MVT::i32, Custom);
+    if (Subtarget.is64Bit()) {
+      if (RV64LegalI32)
+        setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+      else
+        setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
+    }
   } else {
     setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
   }
 
   if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
     // We need the custom lowering to make sure that the resulting sequence
     // for the 32bit case is efficient on 64bit targets.
-    if (Subtarget.is64Bit())
-      setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+    if (Subtarget.is64Bit()) {
+      if (RV64LegalI32) {
+        setOperationAction(ISD::CTLZ, MVT::i32,
+                           Subtarget.hasStdExtZbb() ? Legal : Promote);
+        if (!Subtarget.hasStdExtZbb())
+          setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
+      } else
+        setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+    }
   } else {
     setOperationAction(ISD::CTLZ, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::CTLZ, MVT::i32, Expand);
   }
 
-  if (Subtarget.is64Bit())
+  if (!RV64LegalI32 && Subtarget.is64Bit())
     setOperationAction(ISD::ABS, MVT::i32, Custom);
 
   if (!Subtarget.hasVendorXTHeadCondMov())
     setOperationAction(ISD::SELECT, XLenVT, Custom);
 
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SELECT, MVT::i32, Promote);
+
   static const unsigned FPLegalNodeTypes[] = {
       ISD::FMINNUM,        ISD::FMAXNUM,       ISD::LRINT,
       ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
@@ -525,6 +589,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                         ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
                        XLenVT, Legal);
 
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+                          ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+                         MVT::i32, Legal);
+
     setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   }
@@ -569,6 +638,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setBooleanVectorContents(ZeroOrOneBooleanContent);
 
     setOperationAction(ISD::VSCALE, XLenVT, Custom);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::VSCALE, MVT::i32, Custom);
 
     // RVV intrinsics may have illegal operands.
     // We also need to custom legalize vmv.x.s.
@@ -1247,8 +1318,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget.hasStdExtA())
+  if (Subtarget.hasStdExtA()) {
     setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+  }
 
   if (Subtarget.hasForcedAtomics()) {
     // Force __sync libcalls to be emitted for atomic rmw/cas operations.
@@ -2090,7 +2164,12 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
     return MVT::f32;
 
-  return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+  MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
+    return MVT::i64;
+
+  return PartVT;
 }
 
 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
@@ -2105,6 +2184,21 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
 }
 
+unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+  unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+      Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
+    IntermediateVT = MVT::i64;
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
+    RegisterVT = MVT::i64;
+
+  return NumRegs;
+}
+
 // Changes the condition code and swaps operands if necessary, so the SetCC
 // operation matches one of the comparisons supported directly by branches
 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
@@ -3263,6 +3357,8 @@ static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
       auto OpCode =
         VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+      if (!VT.isFloatingPoint())
+        LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
       Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
                         LastOp, Mask, VL);
       Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
@@ -3390,6 +3486,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
                                         : RISCVISD::VMV_V_X_VL;
+    if (!VT.isFloatingPoint())
+      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
     Splat =
         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3623,10 +3721,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
       // For a splat, perform a scalar truncate before creating the wider
       // vector.
-      assert(Splat.getValueType() == XLenVT &&
-             "Unexpected type for i1 splat value");
-      Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
-                          DAG.getConstant(1, DL, XLenVT));
+      Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
+                          DAG.getConstant(1, DL, Splat.getValueType()));
       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
     } else {
       SmallVector<SDValue, 8> Ops(Op->op_values());
@@ -3643,6 +3739,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       return Gather;
     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
                                         : RISCVISD::VMV_V_X_VL;
+    if (!VT.isFloatingPoint())
+      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
     Splat =
         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3704,7 +3802,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
   SDValue Vec = DAG.getUNDEF(ContainerVT);
   UndefCount = 0;
-  for (const SDValue &V : Op->ops()) {
+  for (SDValue V : Op->ops()) {
     if (V.isUndef()) {
       UndefCount++;
       continue;
@@ -3717,6 +3815,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     }
     auto OpCode =
       VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+    if (!VT.isFloatingPoint())
+      V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
                       V, Mask, VL);
   }
@@ -4277,6 +4377,8 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
   auto OpCode = IsVSlidedown ?
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+  if (!VT.isFloatingPoint())
+    Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
   auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
                          DAG.getUNDEF(ContainerVT),
                          convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
@@ -5162,10 +5264,12 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
     return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
   }
 
-  SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
-  SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
-  return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
-                      ISD::CondCode::SETNE);
+  SDValue FPCLASS =
+      DAG.getNode(RISCVISD::FPCLASS, DL, XLenVT, Op.getOperand(0));
+  SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FPCLASS, TDCMaskV);
+  SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
+                             ISD::CondCode::SETNE);
+  return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
 }
 
 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
@@ -5673,6 +5777,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     if (VT.isFixedLengthVector())
       ContainerVT = getContainerForFixedLengthVector(VT);
     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+    Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
     SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
                             DAG.getUNDEF(ContainerVT), Scalar, VL);
     if (VT.isFixedLengthVector())
@@ -5680,9 +5785,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return V;
   }
   case ISD::VSCALE: {
+    MVT XLenVT = Subtarget.getXLenVT();
     MVT VT = Op.getSimpleValueType();
     SDLoc DL(Op);
-    SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+    SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
     // We define our scalable vector types for lmul=1 to use a 64 bit known
     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
     // vscale as VLENB / 8.
@@ -5695,22 +5801,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     if (isPowerOf2_64(Val)) {
       uint64_t Log2 = Log2_64(Val);
       if (Log2 < 3)
-        return DAG.getNode(ISD::SRL, DL, VT, VLENB,
-                           DAG.getConstant(3 - Log2, DL, VT));
-      if (Log2 > 3)
-        return DAG.getNode(ISD::SHL, DL, VT, VLENB,
-                           DAG.getConstant(Log2 - 3, DL, VT));
-      return VLENB;
-    }
-    // If the multiplier is a multiple of 8, scale it down to avoid needing
-    // to shift the VLENB value.
-    if ((Val % 8) == 0)
-      return DAG.getNode(ISD::MUL, DL, VT, VLENB,
-                         DAG.getConstant(Val / 8, DL, VT));
-
-    SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
-                                 DAG.getConstant(3, DL, VT));
-    return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+        Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+                          DAG.getConstant(3 - Log2, DL, VT));
+      else if (Log2 > 3)
+        Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
+                          DAG.getConstant(Log2 - 3, DL, XLenVT));
+    } else if ((Val % 8) == 0) {
+      // If the multiplier is a multiple of 8, scale it down to avoid needing
+      // to shift the VLENB value.
+      Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
+                        DAG.getConstant(Val / 8, DL, XLenVT));
+    } else {
+      SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+                                   DAG.getConstant(3, DL, XLenVT));
+      Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
+                        DAG.getConstant(Val, DL, XLenVT));
+    }
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
   }
   case ISD::FPOWI: {
     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
@@ -5958,7 +6065,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
     SDValue Res =
         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
-    if (Subtarget.is64Bit())
+    if (Subtarget.is64Bit() && !RV64LegalI32)
       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
     return DAG.getBitcast(MVT::i32, Res);
   }
@@ -5987,7 +6094,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
     SDValue Res =
         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
-    if (Subtarget.is64Bit())
+    if (Subtarget.is64Bit() && !RV64LegalI32)
       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
     return DAG.getBitcast(MVT::i32, Res);
   }
@@ -7155,12 +7262,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
   }
-  MVT XLenVT = Subtarget.getXLenVT();
-  assert(SplatVal.getValueType() == XLenVT &&
-         "Unexpected type for i1 splat value");
   MVT InterVT = VT.changeVectorElementType(MVT::i8);
-  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
-                         DAG.getConstant(1, DL, XLenVT));
+  SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
+                         DAG.getConstant(1, DL, SplatVal.getValueType()));
   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
   SDValue Zero = DAG.getConstant(0, DL, InterVT);
   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
@@ -7589,6 +7693,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
     unsigned Opc =
         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
     if (isNullConstant(Idx)) {
+      if (!VecVT.isFloatingPoint())
+        Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
 
       if (ContainerVT != OrigContainerVT)
@@ -7693,8 +7799,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
       auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
       SDValue Vfirst =
           DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
-      return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
-                          ISD::SETEQ);
+      SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
+                                 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+      return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
     }
     if (VecVT.isFixedLengthVector()) {
       unsigned NumElts = VecVT.getVectorNumElements();
@@ -7732,8 +7839,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
         // Extract the bit from GPR.
         SDValue ShiftRight =
             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
-        return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
-                           DAG.getConstant(1, DL, XLenVT));
+        SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+                                  DAG.getConstant(1, DL, XLenVT));
+        return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
       }
     }
     // Otherwise, promote to an i8 vector and extract from that.
@@ -8028,7 +8136,9 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
   SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
 
   SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+  SDValue Res =
+      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
 }
 
 // LMUL * VLEN should be greater than or equal to EGS * SEW
@@ -8072,12 +8182,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
     }
 
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
   }
   case Intrinsic::riscv_sm4ks:
   case Intrinsic::riscv_sm4ed: {
     unsigned Opc =
         IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      SDValue Res =
+          DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
                        Op.getOperand(3));
   }
@@ -8088,20 +8216,43 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
   }
   case Intrinsic::riscv_clmul:
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
     return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::riscv_clmulh:
-    return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
-                       Op.getOperand(2));
-  case Intrinsic::riscv_clmulr:
-    return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
-                       Op.getOperand(2));
+  case Intrinsic::riscv_clmulr: {
+    unsigned Opc =
+        IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+                           DAG.getConstant(32, DL, MVT::i64));
+      NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+                           DAG.getConstant(32, DL, MVT::i64));
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
+      Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
+                        DAG.getConstant(32, DL, MVT::i64));
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
+    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
+  }
   case Intrinsic::experimental_get_vector_length:
     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
-  case Intrinsic::riscv_vmv_x_s:
-    assert(Op.getValueType() == XLenVT && "Unexpected VT!");
-    return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
-                       Op.getOperand(1));
+  case Intrinsic::riscv_vmv_x_s: {
+    SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
+    return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
+  }
   case Intrinsic::riscv_vfmv_f_s:
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
                        Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
@@ -8621,8 +8772,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
          "Unexpected reduction lowering");
 
   MVT XLenVT = Subtarget.getXLenVT();
-  assert(Op.getValueType() == XLenVT &&
-         "Expected reduction output to be legalized to XLenVT");
 
   MVT ContainerVT = VecVT;
   if (VecVT.isFixedLengthVector()) {
@@ -8676,6 +8825,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
   }
 
   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+  SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
 
   if (!IsVP)
     return SetCC;
@@ -8686,7 +8836,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
   // 0 for an inactive vector, and so we've already received the neutral value:
   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
   // can simply include the start value.
-  return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
+  return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
 }
 
 static bool isNonZeroAVL(SDValue AVL) {
@@ -10581,6 +10731,8 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
 
+  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
+
   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
                               DAG.getConstant(2, DL, XLenVT));
   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
@@ -16732,12 +16884,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
     break;
   case CCValAssign::BCvt:
     if (VA.getLocVT().isInteger() &&
-        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
-    else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
-      Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
-    else
+    } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      if (RV64LegalI32) {
+        Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
+        Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+      } else {
+        Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+      }
+    } else {
       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+    }
     break;
   }
   return Val;
@@ -16791,13 +16949,19 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
     break;
   case CCValAssign::BCvt:
-    if (VA.getLocVT().isInteger() &&
-        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
-      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
-    else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
-      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
-    else
+    if (LocVT.isInteger() &&
+        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
+      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
+    } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
+      if (RV64LegalI32) {
+        Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
+        Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
+      } else {
+        Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+      }
+    } else {
       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+    }
     break;
   }
   return Val;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 49dd01eccb02a0b..bb2ac3c2e012d58 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -487,6 +487,12 @@ class RISCVTargetLowering : public TargetLowering {
                                          CallingConv::ID CC,
                                          EVT VT) const override;
 
+  unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+                                                CallingConv::ID CC, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                MVT &RegisterVT) const override;
+
   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
                                             EVT VT) const override;
 

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 1a9242cff0b445d..71ba4025b6a07e9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1183,11 +1183,13 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
 
 class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
     : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
-    : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
+                ValueType vt2 = XLenVT>
+    : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
 
-class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType>
-    : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)),
+class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
+                ValueType vt = XLenVT>
+    : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)),
           (Inst GPR:$rs1, ImmType:$imm)>;
 class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
     : PatGprImm<OpNode, Inst, simm12>;
@@ -1744,7 +1746,7 @@ def : LdPat<sextloadi8, LB>;
 def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
 def : LdPat<sextloadi16, LH>;
 def : LdPat<extloadi16, LH>;
-def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
+def : LdPat<load, LW, i32>;
 def : LdPat<zextloadi8, LBU>;
 def : LdPat<zextloadi16, LHU>;
 
@@ -1758,7 +1760,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
 
 def : StPat<truncstorei8, SB, GPR, XLenVT>;
 def : StPat<truncstorei16, SH, GPR, XLenVT>;
-def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
+def : StPat<store, SW, GPR, i32>;
 
 /// Fences
 
@@ -1992,6 +1994,51 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
                  (AddiPairImmSmall AddiPair:$rs2))>;
 }
 
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
+
+// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
+// checking happy so we can use ADDIW which expects an XLen immediate.
+def as_i64imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+let Predicates = [IsRV64] in {
+def : LdPat<sextloadi8, LB, i32>;
+def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
+def : LdPat<sextloadi16, LH, i32>;
+def : LdPat<extloadi16, LH, i32>;
+def : LdPat<zextloadi8, LBU, i32>;
+def : LdPat<zextloadi16, LHU, i32>;
+
+def : StPat<truncstorei8, SB, GPR, i32>;
+def : StPat<truncstorei16, SH, GPR, i32>;
+
+def : Pat<(anyext GPR:$src), (COPY GPR:$src)>;
+def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>;
+def : Pat<(trunc GPR:$src), (COPY GPR:$src)>;
+
+def : PatGprGpr<add, ADDW, i32, i32>;
+def : PatGprGpr<sub, SUBW, i32, i32>;
+def : PatGprGpr<shiftopw<shl>, SLLW, i32, i64>;
+def : PatGprGpr<shiftopw<srl>, SRLW, i32, i64>;
+def : PatGprGpr<shiftopw<sra>, SRAW, i32, i64>;
+
+def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
+          (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
+
+def : PatGprImm<shl, SLLIW, uimm5, i32>;
+def : PatGprImm<srl, SRLIW, uimm5, i32>;
+def : PatGprImm<sra, SRAIW, uimm5, i32>;
+}
+
+let Predicates = [IsRV64, NotHasStdExtZba] in {
+def : Pat<(zext GPR:$src), (SRLI (SLLI GPR:$src, 32), 32)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Standard extensions
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index c43af14bb7f7005..5a3d393bdb599e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -372,3 +372,61 @@ def : Pat<(int_riscv_masked_cmpxchg_i64
           (PseudoMaskedCmpXchg32
             GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
 } // Predicates = [HasStdExtA, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+class PatGprGprA<SDPatternOperator OpNode, RVInst Inst, ValueType vt>
+    : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+
+multiclass AMOPat2<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+                   list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+                   !cast<RVInst>(BaseInst#"_AQ"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+                   !cast<RVInst>(BaseInst#"_RL"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                   !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                   !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                   !cast<RVInst>(BaseInst), vt>;
+}
+}
+
+defm : AMOPat2<"atomic_swap_32", "AMOSWAP_W", i32>;
+defm : AMOPat2<"atomic_load_add_32", "AMOADD_W", i32>;
+defm : AMOPat2<"atomic_load_and_32", "AMOAND_W", i32>;
+defm : AMOPat2<"atomic_load_or_32", "AMOOR_W", i32>;
+defm : AMOPat2<"atomic_load_xor_32", "AMOXOR_W", i32>;
+defm : AMOPat2<"atomic_load_max_32", "AMOMAX_W", i32>;
+defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
+defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
+defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
+
+let Predicates = [HasAtomicLdSt] in {
+  def : LdPat<atomic_load_8,  LB, i32>;
+  def : LdPat<atomic_load_16, LH, i32>;
+  def : LdPat<atomic_load_32, LW, i32>;
+
+  def : StPat<atomic_store_8,  SB, GPR, i32>;
+  def : StPat<atomic_store_16, SH, GPR, i32>;
+  def : StPat<atomic_store_32, SW, GPR, i32>;
+}
+

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 34becfafe77473d..f3794c8a0433b1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -538,7 +538,7 @@ def SplitF64Pseudo_INX
              [(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>;
 } // Predicates = [HasStdExtZdinx, IsRV32]
 
-let Predicates = [HasStdExtD, IsRV32] in {
+let Predicates = [HasStdExtD] in {
 
 // double->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>;
@@ -557,7 +557,7 @@ def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>;
 // [u]int->double.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1, FRM_RNE)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1, FRM_RNE)>;
-} // Predicates = [HasStdExtD, IsRV32]
+} // Predicates = [HasStdExtD]
 
 let Predicates = [HasStdExtZdinx, IsRV32] in {
 

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 3a5794bb2d19474..32a66882fcd54d4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -680,19 +680,19 @@ def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm
           (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
 } // Predicates = [HasStdExtZfinx]
 
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
 // Moves (no conversion)
 def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
 def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
 
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
 // Moves (no conversion)
 def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
 def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
 
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
 // float->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>;
@@ -710,9 +710,9 @@ def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>;
 // [u]int->float. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
 
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
 // float->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>;
@@ -730,7 +730,7 @@ def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>;
 // [u]int->float. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
 
 let Predicates = [HasStdExtF, IsRV64] in {
 // Moves (no conversion)

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 6c3c9a771d94b62..f9890ca4b0eec15 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -114,3 +114,18 @@ let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in {
 def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
           (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
 } // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
+def : PatGprGpr<mul, MULW, i32, i32>;
+}
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : PatGprGpr<sdiv, DIVW, i32, i32>;
+def : PatGprGpr<udiv, DIVUW, i32, i32>;
+def : PatGprGpr<srem, REMW, i32, i32>;
+def : PatGprGpr<urem, REMUW, i32, i32>;
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 41e139e3c7a9ebe..1d44b1ad26364e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -886,9 +886,7 @@ defm : StoreUpdatePat<post_truncsti8, TH_SBIA>;
 defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>;
 defm : StoreUpdatePat<post_truncsti16, TH_SHIA>;
 defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>;
-}
 
-let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
 defm : StoreUpdatePat<post_store, TH_SWIA, i32>;
 defm : StoreUpdatePat<pre_store, TH_SWIB, i32>;
 }
@@ -899,3 +897,15 @@ defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>;
 defm : StoreUpdatePat<post_store, TH_SDIA, i64>;
 defm : StoreUpdatePat<pre_store, TH_SDIB, i64>;
 }
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : StoreUpdatePat<post_truncsti8, TH_SBIA, i32>;
+defm : StoreUpdatePat<pre_truncsti8, TH_SBIB, i32>;
+defm : StoreUpdatePat<post_truncsti16, TH_SHIA, i32>;
+defm : StoreUpdatePat<pre_truncsti16, TH_SHIB, i32>;
+}
+

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 4a62a61dadcf3bb..fec6396c602baad 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -812,3 +812,29 @@ let Predicates = [HasStdExtZbkx] in {
 def : PatGprGpr<int_riscv_xperm4, XPERM4>;
 def : PatGprGpr<int_riscv_xperm8, XPERM8>;
 } // Predicates = [HasStdExtZbkx]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<ctlz, CLZW, i32>;
+def : PatGpr<cttz, CTZW, i32>;
+def : PatGpr<ctpop, CPOPW, i32>;
+
+def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
+def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
+def : PatGprGpr<shiftopw<rotl>, ROLW, i32, i64>;
+def : PatGprGpr<shiftopw<rotr>, RORW, i32, i64>;
+def : PatGprImm<rotr, RORIW, uimm5, i32>;
+
+def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)),
+          (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>;
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
index f3809f2abff695b..d819033eea68c70 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -62,7 +62,7 @@ def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
 def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
 } // Predicates = [HasStdExtZfbfmin]
 
-let Predicates = [HasStdExtZfbfmin, IsRV32] in {
+let Predicates = [HasStdExtZfbfmin] in {
 // bf16->[u]int. Round-to-zero must be used for the f32->int step, the
 // rounding mode has no effect for bf16->f32.
 def : Pat<(i32 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 1dc391d3f084fec..19d467f3b344c2a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -461,7 +461,7 @@ def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src,
 def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxOrZhinxmin]
 
-let Predicates = [HasStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfh] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>;
 def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>;
@@ -479,9 +479,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>;
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>;
 def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfh]
 
-let Predicates = [HasStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinx] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>;
 def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>;
@@ -499,7 +499,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>;
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinx]
 
 let Predicates = [HasStdExtZfh, IsRV64] in {
 // Use target specific isd nodes to help us remember the result is sign
@@ -597,7 +597,7 @@ def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2),
 def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64]
 
-let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
@@ -611,9 +611,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
 def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfhmin, NoStdExtZfh]
 
-let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
@@ -627,7 +627,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FR
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx]
 
 let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
 // half->[u]int64. Round-to-zero must be used.

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
new file mode 100644
index 000000000000000..e4eca5c491edb18
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+
+; These tests are each targeted at a particular RISC-V ALU instruction. Most
+; other files in this folder exercise LLVM IR instructions that don't directly
+; match a RISC-V instruction.
+
+; Register-immediate instructions.
+
+define i32 @addi(i32 %a) nounwind {
+; RV64I-LABEL: addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addiw a0, a0, 1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 1
+  ret i32 %1
+}
+
+define i32 @slti(i32 %a) nounwind {
+; RV64I-LABEL: slti:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    slti a0, a0, 2
+; RV64I-NEXT:    ret
+  %1 = icmp slt i32 %a, 2
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @sltiu(i32 %a) nounwind {
+; RV64I-LABEL: sltiu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltiu a0, a0, 3
+; RV64I-NEXT:    ret
+  %1 = icmp ult i32 %a, 3
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @xori(i32 %a) nounwind {
+; RV64I-LABEL: xori:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xori a0, a0, 4
+; RV64I-NEXT:    ret
+  %1 = xor i32 %a, 4
+  ret i32 %1
+}
+
+define i32 @ori(i32 %a) nounwind {
+; RV64I-LABEL: ori:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ori a0, a0, 5
+; RV64I-NEXT:    ret
+  %1 = or i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @andi(i32 %a) nounwind {
+; RV64I-LABEL: andi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 6
+; RV64I-NEXT:    ret
+  %1 = and i32 %a, 6
+  ret i32 %1
+}
+
+define i32 @slli(i32 %a) nounwind {
+; RV64I-LABEL: slli:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slliw a0, a0, 7
+; RV64I-NEXT:    ret
+  %1 = shl i32 %a, 7
+  ret i32 %1
+}
+
+define i32 @srli(i32 %a) nounwind {
+; RV64I-LABEL: srli:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 8
+; RV64I-NEXT:    ret
+  %1 = lshr i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @srai(i32 %a) nounwind {
+; RV64I-LABEL: srai:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a0, a0, 9
+; RV64I-NEXT:    ret
+  %1 = ashr i32 %a, 9
+  ret i32 %1
+}
+
+; Register-register instructions
+
+define i32 @add(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sub(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sub:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = sub i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sub_negative_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sub_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -2
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = sub i32 -2, %a
+  ret i32 %1
+}
+
+define i32 @sll(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sll:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sll_negative_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sll_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sllw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = shl i32 -1, %a
+  ret i32 %1
+}
+
+define i32 @slt(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: slt:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    slt a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = icmp slt i32 %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @sltu(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: sltu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = icmp ult i32 %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @xor(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: xor:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = xor i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srl(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: srl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = lshr i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srl_negative_constant_lhs(i32 %a) nounwind {
+;
+; RV64I-LABEL: srl_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    srlw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = lshr i32 -1, %a
+  ret i32 %1
+}
+
+define i32 @sra(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: sra:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = ashr i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sra_negative_constant_lhs(i32 %a) nounwind {
+;
+; RV64I-LABEL: sra_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    sraw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = ashr i32 2147483648, %a
+  ret i32 %1
+}
+
+define i32 @or(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: or:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = or i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @and(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: and:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = and i32 %a, %b
+  ret i32 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
new file mode 100644
index 000000000000000..f2228e9013ce9f1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
@@ -0,0 +1,699 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64IM %s
+
+define i32 @udiv(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: udiv:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @udiv_constant(i32 %a) nounwind {
+; RV64I-LABEL: udiv_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    lui a1, 838861
+; RV64IM-NEXT:    addi a1, a1, -819
+; RV64IM-NEXT:    slli a1, a1, 32
+; RV64IM-NEXT:    mulhu a0, a0, a1
+; RV64IM-NEXT:    srli a0, a0, 34
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @udiv_pow2(i32 %a) nounwind {
+; RV64I-LABEL: udiv_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @udiv_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: udiv_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 10, %a
+  ret i32 %1
+}
+
+define i64 @udiv64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: udiv64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divu a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @udiv64_constant(i64 %a) nounwind {
+; RV64I-LABEL: udiv64_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a1, 838861
+; RV64IM-NEXT:    addiw a1, a1, -819
+; RV64IM-NEXT:    slli a2, a1, 32
+; RV64IM-NEXT:    add a1, a1, a2
+; RV64IM-NEXT:    mulhu a0, a0, a1
+; RV64IM-NEXT:    srli a0, a0, 2
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 %a, 5
+  ret i64 %1
+}
+
+define i64 @udiv64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: udiv64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divu a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 10, %a
+  ret i64 %1
+}
+
+define i8 @udiv8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: udiv8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a1, a1, 255
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @udiv8_constant(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 205
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a0, a0, 10
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, 5
+  ret i8 %1
+}
+
+define i8 @udiv8_pow2(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 248
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 248
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, 8
+  ret i8 %1
+}
+
+define i8 @udiv8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 10, %a
+  ret i8 %1
+}
+
+define i16 @udiv16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: udiv16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a2, 16
+; RV64IM-NEXT:    addi a2, a2, -1
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @udiv16_constant(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    lui a1, 13
+; RV64IM-NEXT:    addi a1, a1, -819
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a0, a0, 18
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, 5
+  ret i16 %1
+}
+
+define i16 @udiv16_pow2(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, 8
+  ret i16 %1
+}
+
+define i16 @udiv16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a1, a0, 48
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 10, %a
+  ret i16 %1
+}
+
+define i32 @sdiv(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sdiv:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sdiv_constant(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a0, a0
+; RV64IM-NEXT:    lui a1, 419430
+; RV64IM-NEXT:    addiw a1, a1, 1639
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 63
+; RV64IM-NEXT:    srai a0, a0, 33
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @sdiv_pow2(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    sraiw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @sdiv_pow2_2(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_pow2_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_pow2_2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 65536
+  ret i32 %1
+}
+
+define i32 @sdiv_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 -10, %a
+  ret i32 %1
+}
+
+define i64 @sdiv64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sdiv64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    div a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @sdiv64_constant(i64 %a) nounwind {
+; RV64I-LABEL: sdiv64_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV64IM-NEXT:    ld a1, %lo(.LCPI21_0)(a1)
+; RV64IM-NEXT:    mulh a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 63
+; RV64IM-NEXT:    srai a0, a0, 1
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 %a, 5
+  ret i64 %1
+}
+
+define i64 @sdiv64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: sdiv64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    div a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 10, %a
+  ret i64 %1
+}
+
+; Although this sdiv has two sexti32 operands, it shouldn't compile to divw on
+; RV64M as that wouldn't produce the correct result for e.g. INT_MIN/-1.
+
+define i64 @sdiv64_sext_operands(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sdiv64_sext_operands:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_sext_operands:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a0, a0
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    div a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %3 = sdiv i64 %1, %2
+  ret i64 %3
+}
+
+define i8 @sdiv8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: sdiv8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @sdiv8_constant(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, 103
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    sraiw a1, a0, 9
+; RV64IM-NEXT:    srliw a0, a0, 15
+; RV64IM-NEXT:    andi a0, a0, 1
+; RV64IM-NEXT:    addw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, 5
+  ret i8 %1
+}
+
+define i8 @sdiv8_pow2(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    srliw a1, a1, 12
+; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 27
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    srliw a1, a1, 12
+; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 27
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, 8
+  ret i8 %1
+}
+
+define i8 @sdiv8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a1, a0, 24
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 -10, %a
+  ret i8 %1
+}
+
+define i16 @sdiv16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: sdiv16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @sdiv16_constant(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    lui a1, 6
+; RV64IM-NEXT:    addi a1, a1, 1639
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a1, a0, 31
+; RV64IM-NEXT:    sraiw a0, a0, 17
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, 5
+  ret i16 %1
+}
+
+define i16 @sdiv16_pow2(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    srliw a1, a1, 28
+; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 19
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    srliw a1, a1, 28
+; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 19
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, 8
+  ret i16 %1
+}
+
+define i16 @sdiv16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a1, a0, 16
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 -10, %a
+  ret i16 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
new file mode 100644
index 000000000000000..0ef17ca964db567
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
@@ -0,0 +1,2564 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=RV64I,RV64-NOPOOL
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=RV64I,RV64I-POOL
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zba \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBA
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbb \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBB
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbs \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB
+
+; Materializing constants
+
+; TODO: It would be preferable if anyext constant returns were sign rather
+; than zero extended. See PR39092. For now, mark returns as explicitly signext
+; (this matches what Clang would generate for equivalent C/C++ anyway).
+
+define signext i32 @zero() nounwind {
+; RV64I-LABEL: zero:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 0
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: zero:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: zero:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 0
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: zero:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 0
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: zero:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 0
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 0
+}
+
+define signext i32 @pos_small() nounwind {
+; RV64I-LABEL: pos_small:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 2047
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_small:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 2047
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_small:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 2047
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_small:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 2047
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 2047
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2047
+}
+
+define signext i32 @neg_small() nounwind {
+; RV64I-LABEL: neg_small:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -2048
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_small:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -2048
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_small:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -2048
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_small:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -2048
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2048
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -2048
+}
+
+define signext i32 @pos_i32() nounwind {
+; RV64I-LABEL: pos_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 423811
+; RV64I-NEXT:    addiw a0, a0, -1297
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_i32:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 423811
+; RV64IZBA-NEXT:    addiw a0, a0, -1297
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_i32:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 423811
+; RV64IZBB-NEXT:    addiw a0, a0, -1297
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_i32:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 423811
+; RV64IZBS-NEXT:    addiw a0, a0, -1297
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 423811
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1297
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 1735928559
+}
+
+define signext i32 @neg_i32() nounwind {
+; RV64I-LABEL: neg_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 912092
+; RV64I-NEXT:    addiw a0, a0, -273
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_i32:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 912092
+; RV64IZBA-NEXT:    addiw a0, a0, -273
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_i32:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 912092
+; RV64IZBB-NEXT:    addiw a0, a0, -273
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_i32:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 912092
+; RV64IZBS-NEXT:    addiw a0, a0, -273
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 912092
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -559038737
+}
+
+define signext i32 @pos_i32_hi20_only() nounwind {
+; RV64I-LABEL: pos_i32_hi20_only:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_i32_hi20_only:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 16
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_i32_hi20_only:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_i32_hi20_only:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 65536 ; 0x10000
+}
+
+define signext i32 @neg_i32_hi20_only() nounwind {
+; RV64I-LABEL: neg_i32_hi20_only:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048560
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_i32_hi20_only:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 1048560
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_i32_hi20_only:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048560
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_i32_hi20_only:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1048560
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048560
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -65536 ; -0x10000
+}
+
+; This can be materialized with ADDI+SLLI, improving compressibility.
+
+define signext i32 @imm_left_shifted_addi() nounwind {
+; RV64I-LABEL: imm_left_shifted_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 32
+; RV64I-NEXT:    addiw a0, a0, -64
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 32
+; RV64IZBA-NEXT:    addiw a0, a0, -64
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 32
+; RV64IZBB-NEXT:    addiw a0, a0, -64
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 32
+; RV64IZBS-NEXT:    addiw a0, a0, -64
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 32
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -64
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 131008 ; 0x1FFC0
+}
+
+; This can be materialized with ADDI+SRLI, improving compressibility.
+
+define signext i32 @imm_right_shifted_addi() nounwind {
+; RV64I-LABEL: imm_right_shifted_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 524288
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524288
+; RV64IZBA-NEXT:    addiw a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524288
+; RV64IZBB-NEXT:    addiw a0, a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 524288
+; RV64IZBS-NEXT:    addiw a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2147483647 ; 0x7FFFFFFF
+}
+
+; This can be materialized with LUI+SRLI, improving compressibility.
+
+define signext i32 @imm_right_shifted_lui() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 56
+; RV64I-NEXT:    addiw a0, a0, 580
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 56
+; RV64IZBA-NEXT:    addiw a0, a0, 580
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 56
+; RV64IZBB-NEXT:    addiw a0, a0, 580
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 56
+; RV64IZBS-NEXT:    addiw a0, a0, 580
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 56
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 580
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 229956 ; 0x38244
+}
+
+define i64 @imm64_1() nounwind {
+; RV64I-LABEL: imm64_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 31
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 31
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147483648 ; 0x8000_0000
+}
+
+define i64 @imm64_2() nounwind {
+; RV64I-LABEL: imm64_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    srli a0, a0, 32
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    srli a0, a0, 32
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -1
+; RV64IZBS-NEXT:    srli a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294967295 ; 0xFFFF_FFFF
+}
+
+define i64 @imm64_3() nounwind {
+; RV64I-LABEL: imm64_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 32
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 32
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294967296 ; 0x1_0000_0000
+}
+
+define i64 @imm64_4() nounwind {
+; RV64I-LABEL: imm64_4:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_4:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    slli a0, a0, 63
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_4:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    slli a0, a0, 63
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_4:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_4:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223372036854775808 ; 0x8000_0000_0000_0000
+}
+
+define i64 @imm64_5() nounwind {
+; RV64I-LABEL: imm64_5:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_5:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    slli a0, a0, 63
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_5:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    slli a0, a0, 63
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_5:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_5:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000
+}
+
+define i64 @imm64_6() nounwind {
+; RV64I-LABEL: imm64_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 9321
+; RV64I-NEXT:    addi a0, a0, -1329
+; RV64I-NEXT:    slli a0, a0, 35
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_6:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 9321
+; RV64IZBA-NEXT:    addi a0, a0, -1329
+; RV64IZBA-NEXT:    slli a0, a0, 35
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_6:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 9321
+; RV64IZBB-NEXT:    addi a0, a0, -1329
+; RV64IZBB-NEXT:    slli a0, a0, 35
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_6:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 9321
+; RV64IZBS-NEXT:    addi a0, a0, -1329
+; RV64IZBS-NEXT:    slli a0, a0, 35
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_6:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 9321
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1329
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
+}
+
+define i64 @imm64_7() nounwind {
+; RV64I-LABEL: imm64_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 7
+; RV64I-NEXT:    slli a0, a0, 36
+; RV64I-NEXT:    addi a0, a0, 11
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_7:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 7
+; RV64IZBA-NEXT:    slli a0, a0, 36
+; RV64IZBA-NEXT:    addi a0, a0, 11
+; RV64IZBA-NEXT:    slli a0, a0, 24
+; RV64IZBA-NEXT:    addi a0, a0, 15
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_7:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 7
+; RV64IZBB-NEXT:    slli a0, a0, 36
+; RV64IZBB-NEXT:    addi a0, a0, 11
+; RV64IZBB-NEXT:    slli a0, a0, 24
+; RV64IZBB-NEXT:    addi a0, a0, 15
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_7:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 7
+; RV64IZBS-NEXT:    slli a0, a0, 36
+; RV64IZBS-NEXT:    addi a0, a0, 11
+; RV64IZBS-NEXT:    slli a0, a0, 24
+; RV64IZBS-NEXT:    addi a0, a0, 15
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_7:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 7
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 11
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 15
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F
+}
+
+; TODO: it can be preferable to put constants that are expensive to materialise
+; into the constant pool, especially for -Os.
+define i64 @imm64_8() nounwind {
+; RV64-NOPOOL-LABEL: imm64_8:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 583
+; RV64-NOPOOL-NEXT:    addiw a0, a0, -1875
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -947
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, 1511
+; RV64-NOPOOL-NEXT:    slli a0, a0, 13
+; RV64-NOPOOL-NEXT:    addi a0, a0, -272
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm64_8:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI17_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_8:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 596523
+; RV64IZBA-NEXT:    addi a0, a0, 965
+; RV64IZBA-NEXT:    slli.uw a0, a0, 13
+; RV64IZBA-NEXT:    addi a0, a0, -1347
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -529
+; RV64IZBA-NEXT:    slli a0, a0, 4
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_8:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 583
+; RV64IZBB-NEXT:    addiw a0, a0, -1875
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -947
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1511
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -272
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_8:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 583
+; RV64IZBS-NEXT:    addiw a0, a0, -1875
+; RV64IZBS-NEXT:    slli a0, a0, 14
+; RV64IZBS-NEXT:    addi a0, a0, -947
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1511
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -272
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_8:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 583
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1875
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -947
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1511
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -272
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0
+}
+
+define i64 @imm64_9() nounwind {
+; RV64I-LABEL: imm64_9:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_9:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_9:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_9:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_9:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1
+}
+
+; Various cases where extraneous ADDIs can be inserted where a (left shifted)
+; LUI suffices.
+
+define i64 @imm_left_shifted_lui_1() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 262145
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 262145
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 262145
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 262145
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147491840 ; 0x8000_2000
+}
+
+define i64 @imm_left_shifted_lui_2() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 262145
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 262145
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 262145
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 262145
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294983680 ; 0x1_0000_4000
+}
+
+define i64 @imm_left_shifted_lui_3() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 4097
+; RV64I-NEXT:    slli a0, a0, 20
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 4097
+; RV64IZBA-NEXT:    slli a0, a0, 20
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 4097
+; RV64IZBB-NEXT:    slli a0, a0, 20
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 4097
+; RV64IZBS-NEXT:    slli a0, a0, 20
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 17596481011712 ; 0x1001_0000_0000
+}
+
+; Various cases where extraneous ADDIs can be inserted where a (right shifted)
+; LUI suffices, or where multiple ADDIs can be used instead of a single LUI.
+
+define i64 @imm_right_shifted_lui_1() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 983056
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 983056
+; RV64IZBA-NEXT:    srli a0, a0, 16
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 983056
+; RV64IZBB-NEXT:    srli a0, a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 983056
+; RV64IZBS-NEXT:    srli a0, a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 983056
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 281474976706561 ; 0xFFFF_FFFF_F001
+}
+
+define i64 @imm_right_shifted_lui_2() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1044481
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    srli a0, a0, 24
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 1044481
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    srli a0, a0, 24
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1044481
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    srli a0, a0, 24
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1044481
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    srli a0, a0, 24
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1044481
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1099511623681 ; 0xFF_FFFF_F001
+}
+
+; We can materialize the upper bits with a single (shifted) LUI, but that option
+; can be missed due to the lower bits, which aren't just 1s or just 0s.
+
+define i64 @imm_decoupled_lui_addi() nounwind {
+; RV64I-LABEL: imm_decoupled_lui_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 4097
+; RV64I-NEXT:    slli a0, a0, 20
+; RV64I-NEXT:    addi a0, a0, -3
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_decoupled_lui_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 4097
+; RV64IZBA-NEXT:    slli a0, a0, 20
+; RV64IZBA-NEXT:    addi a0, a0, -3
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_decoupled_lui_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 4097
+; RV64IZBB-NEXT:    slli a0, a0, 20
+; RV64IZBB-NEXT:    addi a0, a0, -3
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_decoupled_lui_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 4097
+; RV64IZBS-NEXT:    slli a0, a0, 20
+; RV64IZBS-NEXT:    addi a0, a0, -3
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_decoupled_lui_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -3
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 17596481011709 ; 0x1000_FFFF_FFFD
+}
+
+; This constant can be materialized for RV64 with LUI+SRLI+XORI.
+
+define i64 @imm_end_xori_1() nounwind {
+; RV64I-LABEL: imm_end_xori_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 983040
+; RV64I-NEXT:    srli a0, a0, 3
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_end_xori_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 983040
+; RV64IZBA-NEXT:    srli a0, a0, 3
+; RV64IZBA-NEXT:    not a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_end_xori_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 983040
+; RV64IZBB-NEXT:    srli a0, a0, 3
+; RV64IZBB-NEXT:    not a0, a0
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_end_xori_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 983040
+; RV64IZBS-NEXT:    srli a0, a0, 3
+; RV64IZBS-NEXT:    not a0, a0
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_xori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 983040
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 3
+; RV64IXTHEADBB-NEXT:    not a0, a0
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
+}
+
+; This constant can be materialized for RV64 with ADDI+SLLI+ADDI+ADDI.
+
+define i64 @imm_end_2addi_1() nounwind {
+; RV64I-LABEL: imm_end_2addi_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -2047
+; RV64I-NEXT:    slli a0, a0, 39
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_end_2addi_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -2047
+; RV64IZBA-NEXT:    slli a0, a0, 39
+; RV64IZBA-NEXT:    addi a0, a0, -2048
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_end_2addi_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -2047
+; RV64IZBB-NEXT:    slli a0, a0, 39
+; RV64IZBB-NEXT:    addi a0, a0, -2048
+; RV64IZBB-NEXT:    addi a0, a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_end_2addi_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -2047
+; RV64IZBS-NEXT:    slli a0, a0, 39
+; RV64IZBS-NEXT:    addi a0, a0, -2048
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_2addi_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2047
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 39
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
+}
+
+; This constant can be more efficiently materialized for RV64 if we use two
+; registers instead of one.
+
+define i64 @imm_2reg_1() nounwind {
+; RV64I-LABEL: imm_2reg_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 74565
+; RV64I-NEXT:    addiw a0, a0, 1656
+; RV64I-NEXT:    slli a1, a0, 57
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2reg_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 74565
+; RV64IZBA-NEXT:    addiw a0, a0, 1656
+; RV64IZBA-NEXT:    slli a1, a0, 57
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2reg_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 74565
+; RV64IZBB-NEXT:    addiw a0, a0, 1656
+; RV64IZBB-NEXT:    slli a1, a0, 57
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2reg_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 74565
+; RV64IZBS-NEXT:    addiw a0, a0, 1656
+; RV64IZBS-NEXT:    slli a1, a0, 57
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2reg_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 74565
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1656
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 57
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
+}
+
+; FIXME: This should use a single ADDI for the immediate.
+define void @imm_store_i16_neg1(ptr %p) nounwind {
+; RV64I-LABEL: imm_store_i16_neg1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sh a1, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_store_i16_neg1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a1, -1
+; RV64IZBA-NEXT:    sh a1, 0(a0)
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_store_i16_neg1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a1, -1
+; RV64IZBB-NEXT:    sh a1, 0(a0)
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_store_i16_neg1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a1, -1
+; RV64IZBS-NEXT:    sh a1, 0(a0)
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i16_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sh a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
+  store i16 -1, ptr %p
+  ret void
+}
+
+; FIXME: This should use a single ADDI for the immediate.
+define void @imm_store_i32_neg1(ptr %p) nounwind {
+; RV64I-LABEL: imm_store_i32_neg1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_store_i32_neg1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a1, -1
+; RV64IZBA-NEXT:    sw a1, 0(a0)
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_store_i32_neg1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a1, -1
+; RV64IZBB-NEXT:    sw a1, 0(a0)
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_store_i32_neg1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a1, -1
+; RV64IZBS-NEXT:    sw a1, 0(a0)
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i32_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sw a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
+  store i32 -1, ptr %p
+  ret void
+}
+
+define i64 @imm_5372288229() {
+; RV64I-LABEL: imm_5372288229:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 160
+; RV64I-NEXT:    addiw a0, a0, 437
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -795
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_5372288229:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 655797
+; RV64IZBA-NEXT:    slli.uw a0, a0, 1
+; RV64IZBA-NEXT:    addi a0, a0, -795
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_5372288229:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 160
+; RV64IZBB-NEXT:    addiw a0, a0, 437
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -795
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_5372288229:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 263018
+; RV64IZBS-NEXT:    addiw a0, a0, -795
+; RV64IZBS-NEXT:    bseti a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 160
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -795
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 5372288229
+}
+
+define i64 @imm_neg_5372288229() {
+; RV64I-LABEL: imm_neg_5372288229:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048416
+; RV64I-NEXT:    addiw a0, a0, -437
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, 795
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_5372288229:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_5372288229:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048416
+; RV64IZBB-NEXT:    addiw a0, a0, -437
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, 795
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_5372288229:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 785558
+; RV64IZBS-NEXT:    addiw a0, a0, 795
+; RV64IZBS-NEXT:    bclri a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048416
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 795
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5372288229
+}
+
+define i64 @imm_8953813715() {
+; RV64I-LABEL: imm_8953813715:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 267
+; RV64I-NEXT:    addiw a0, a0, -637
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -1325
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_8953813715:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 437198
+; RV64IZBA-NEXT:    addiw a0, a0, -265
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_8953813715:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 267
+; RV64IZBB-NEXT:    addiw a0, a0, -637
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -1325
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_8953813715:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 88838
+; RV64IZBS-NEXT:    addiw a0, a0, -1325
+; RV64IZBS-NEXT:    bseti a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 267
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1325
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 8953813715
+}
+
+define i64 @imm_neg_8953813715() {
+; RV64I-LABEL: imm_neg_8953813715:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048309
+; RV64I-NEXT:    addiw a0, a0, 637
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, 1325
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_8953813715:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_8953813715:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048309
+; RV64IZBB-NEXT:    addiw a0, a0, 637
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, 1325
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_8953813715:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 959738
+; RV64IZBS-NEXT:    addiw a0, a0, 1325
+; RV64IZBS-NEXT:    bclri a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048309
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1325
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -8953813715
+}
+
+define i64 @imm_16116864687() {
+; RV64I-LABEL: imm_16116864687:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 961
+; RV64I-NEXT:    addiw a0, a0, -1475
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1711
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_16116864687:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 437198
+; RV64IZBA-NEXT:    addiw a0, a0, -265
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_16116864687:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 961
+; RV64IZBB-NEXT:    addiw a0, a0, -1475
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1711
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_16116864687:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 961
+; RV64IZBS-NEXT:    addiw a0, a0, -1475
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1711
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 961
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1711
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 16116864687
+}
+
+define i64 @imm_neg_16116864687() {
+; RV64I-LABEL: imm_neg_16116864687:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1047615
+; RV64I-NEXT:    addiw a0, a0, 1475
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, -1711
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_16116864687:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_16116864687:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1047615
+; RV64IZBB-NEXT:    addiw a0, a0, 1475
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -1711
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_16116864687:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1047615
+; RV64IZBS-NEXT:    addiw a0, a0, 1475
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, -1711
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047615
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1711
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -16116864687
+}
+
+define i64 @imm_2344336315() {
+; RV64I-LABEL: imm_2344336315:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 143087
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2344336315:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 143087
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2344336315:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 143087
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2344336315:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 143087
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2344336315:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 143087
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2344336315 ; 0x8bbbbbbb
+}
+
+define i64 @imm_70370820078523() {
+; RV64-NOPOOL-LABEL: imm_70370820078523:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 256
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 31
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_70370820078523:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI37_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI37_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_70370820078523:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 256
+; RV64IZBA-NEXT:    addiw a0, a0, 31
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_70370820078523:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 256
+; RV64IZBB-NEXT:    addiw a0, a0, 31
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_70370820078523:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 46
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_70370820078523:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 256
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 70370820078523 ; 0x40007bbbbbbb
+}
+
+define i64 @imm_neg_9223372034778874949() {
+; RV64I-LABEL: imm_neg_9223372034778874949:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 506812
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223372034778874949:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 506812
+; RV64IZBA-NEXT:    addiw a0, a0, -1093
+; RV64IZBA-NEXT:    slli a1, a0, 63
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223372034778874949:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 506812
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a1, a0, 63
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223372034778874949:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 506812
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
+}
+
+define i64 @imm_neg_9223301666034697285() {
+; RV64-NOPOOL-LABEL: imm_neg_9223301666034697285:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 917505
+; RV64-NOPOOL-NEXT:    slli a0, a0, 8
+; RV64-NOPOOL-NEXT:    addi a0, a0, 31
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_neg_9223301666034697285:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI39_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI39_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223301666034697285:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 917505
+; RV64IZBA-NEXT:    slli a0, a0, 8
+; RV64IZBA-NEXT:    addi a0, a0, 31
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223301666034697285:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 917505
+; RV64IZBB-NEXT:    slli a0, a0, 8
+; RV64IZBB-NEXT:    addi a0, a0, 31
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223301666034697285:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 46
+; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223301666034697285:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917505
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 8
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223301666034697285 ; 0x800040007bbbbbbb
+}
+
+define i64 @imm_neg_2219066437() {
+; RV64I-LABEL: imm_neg_2219066437:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 913135
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2219066437:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 913135
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2219066437:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 913135
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2219066437:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 913135
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2219066437:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 913135
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2219066437 ; 0xffffffff7bbbbbbb
+}
+
+define i64 @imm_neg_8798043653189() {
+; RV64I-LABEL: imm_neg_8798043653189:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 917475
+; RV64I-NEXT:    addiw a0, a0, -273
+; RV64I-NEXT:    slli a0, a0, 14
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_8798043653189:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 917475
+; RV64IZBA-NEXT:    addiw a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_8798043653189:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 917475
+; RV64IZBB-NEXT:    addiw a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_8798043653189:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 43
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8798043653189:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917475
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb
+}
+
+define i64 @imm_9223372034904144827() {
+; RV64I-LABEL: imm_9223372034904144827:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 572348
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_9223372034904144827:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 572348
+; RV64IZBA-NEXT:    addiw a0, a0, -1093
+; RV64IZBA-NEXT:    slli a1, a0, 63
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_9223372034904144827:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 572348
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a1, a0, 63
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_9223372034904144827:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 572348
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
+}
+
+define i64 @imm_neg_9223354442718100411() {
+; RV64-NOPOOL-LABEL: imm_neg_9223354442718100411:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 524287
+; RV64-NOPOOL-NEXT:    slli a0, a0, 6
+; RV64-NOPOOL-NEXT:    addi a0, a0, -29
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_neg_9223354442718100411:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI43_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI43_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223354442718100411:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524287
+; RV64IZBA-NEXT:    slli a0, a0, 6
+; RV64IZBA-NEXT:    addi a0, a0, -29
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223354442718100411:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524287
+; RV64IZBB-NEXT:    slli a0, a0, 6
+; RV64IZBB-NEXT:    addi a0, a0, -29
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223354442718100411:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 44
+; RV64IZBS-NEXT:    bclri a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223354442718100411:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524287
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 6
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -29
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb
+}
+
+define i64 @imm_2863311530() {
+; RV64I-LABEL: imm_2863311530:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 349525
+; RV64I-NEXT:    addiw a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2863311530:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    addiw a0, a0, 1365
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2863311530:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 349525
+; RV64IZBB-NEXT:    addiw a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2863311530:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 349525
+; RV64IZBS-NEXT:    addiw a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 349525
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+	ret i64 2863311530 ; #0xaaaaaaaa
+}
+
+define i64 @imm_neg_2863311530() {
+; RV64I-LABEL: imm_neg_2863311530:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 699051
+; RV64I-NEXT:    addiw a0, a0, -1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2863311530:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 699051
+; RV64IZBA-NEXT:    addiw a0, a0, -1365
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2863311530:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 699051
+; RV64IZBB-NEXT:    addiw a0, a0, -1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2863311530:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 699051
+; RV64IZBS-NEXT:    addiw a0, a0, -1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 699051
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+	ret i64 -2863311530 ; #0xffffffff55555556
+}
+
+define i64 @imm_2147486378() {
+; RV64I-LABEL: imm_2147486378:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    addi a0, a0, 1365
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2147486378:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 31
+; RV64IZBA-NEXT:    addi a0, a0, 1365
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2147486378:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 31
+; RV64IZBB-NEXT:    addi a0, a0, 1365
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2147486378:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 1365
+; RV64IZBS-NEXT:    bseti a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2147486378:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147485013
+}
+
+define i64 @imm_neg_2147485013() {
+; RV64I-LABEL: imm_neg_2147485013:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 524288
+; RV64I-NEXT:    addi a0, a0, -1365
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2147485013:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524288
+; RV64IZBA-NEXT:    addi a0, a0, -1365
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2147485013:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524288
+; RV64IZBB-NEXT:    addi a0, a0, -1365
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2147485013:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 524288
+; RV64IZBS-NEXT:    addi a0, a0, -1365
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2147485013:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2147485013
+}
+
+define i64 @imm_12900924131259() {
+; RV64I-LABEL: imm_12900924131259:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 188
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    addi a0, a0, 1979
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900924131259:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 1979
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900924131259:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 188
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 24
+; RV64IZBB-NEXT:    addi a0, a0, 1979
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900924131259:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 188
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 24
+; RV64IZBS-NEXT:    addi a0, a0, 1979
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900924131259:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1979
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900924131259
+}
+
+define i64 @imm_50394234880() {
+; RV64I-LABEL: imm_50394234880:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 188
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_50394234880:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    slli.uw a0, a0, 4
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_50394234880:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 188
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_50394234880:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 188
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_50394234880:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 50394234880
+}
+
+define i64 @imm_12900936431479() {
+; RV64I-LABEL: imm_12900936431479:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 192239
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1911
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900936431479:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768956
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 1911
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900936431479:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 192239
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1911
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900936431479:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 192239
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1911
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900936431479:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 192239
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900936431479
+}
+
+define i64 @imm_12900918536874() {
+; RV64I-LABEL: imm_12900918536874:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 384477
+; RV64I-NEXT:    addiw a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900918536874:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    addi a0, a0, -1365
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -1366
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900918536874:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 384477
+; RV64IZBB-NEXT:    addiw a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900918536874:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 384477
+; RV64IZBS-NEXT:    addiw a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900918536874:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 384477
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900918536874
+}
+
+define i64 @imm_12900925247761() {
+; RV64I-LABEL: imm_12900925247761:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 384478
+; RV64I-NEXT:    addiw a0, a0, -1911
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -1775
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900925247761:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    addi a0, a0, 273
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 273
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900925247761:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 384478
+; RV64IZBB-NEXT:    addiw a0, a0, -1911
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -2048
+; RV64IZBB-NEXT:    addi a0, a0, -1775
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900925247761:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 384478
+; RV64IZBS-NEXT:    addiw a0, a0, -1911
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -2048
+; RV64IZBS-NEXT:    addi a0, a0, -1775
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900925247761:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 384478
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1911
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1775
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900925247761
+}
+
+define i64 @imm_7158272001() {
+; RV64I-LABEL: imm_7158272001:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 427
+; RV64I-NEXT:    addiw a0, a0, -1367
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_7158272001:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_7158272001:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 427
+; RV64IZBB-NEXT:    addiw a0, a0, -1367
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_7158272001:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 427
+; RV64IZBS-NEXT:    addiw a0, a0, -1367
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_7158272001:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 427
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1367
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 7158272001 ; 0x0000_0001_aaaa_9001
+}
+
+define i64 @imm_12884889601() {
+; RV64I-LABEL: imm_12884889601:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 768
+; RV64I-NEXT:    addiw a0, a0, -3
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12884889601:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12884889601:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 768
+; RV64IZBB-NEXT:    addiw a0, a0, -3
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12884889601:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 768
+; RV64IZBS-NEXT:    addiw a0, a0, -3
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12884889601:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 768
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -3
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12884889601 ; 0x0000_0002_ffff_d001
+}
+
+define i64 @imm_neg_3435982847() {
+; RV64I-LABEL: imm_neg_3435982847:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048371
+; RV64I-NEXT:    addiw a0, a0, 817
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_3435982847:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_3435982847:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048371
+; RV64IZBB-NEXT:    addiw a0, a0, 817
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_3435982847:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 734001
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_3435982847:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048371
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 817
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -3435982847 ; 0xffff_ffff_3333_1001
+}
+
+define i64 @imm_neg_5726842879() {
+; RV64I-LABEL: imm_neg_5726842879:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048235
+; RV64I-NEXT:    addiw a0, a0, -1419
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_5726842879:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768945
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_5726842879:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048235
+; RV64IZBB-NEXT:    addiw a0, a0, -1419
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_5726842879:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 698997
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5726842879:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048235
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1419
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001
+}
+
+define i64 @imm_neg_10307948543() {
+; RV64I-LABEL: imm_neg_10307948543:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1047962
+; RV64I-NEXT:    addiw a0, a0, -1645
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_10307948543:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_10307948543:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1047962
+; RV64IZBB-NEXT:    addiw a0, a0, -1645
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_10307948543:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 629139
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_10307948543:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047962
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1645
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -10307948543 ; 0xffff_fffd_9999_3001
+}
+
+define i64 @li_rori_1() {
+; RV64I-LABEL: li_rori_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -17
+; RV64I-NEXT:    slli a0, a0, 43
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -17
+; RV64IZBA-NEXT:    slli a0, a0, 43
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -18
+; RV64IZBB-NEXT:    rori a0, a0, 21
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -17
+; RV64IZBS-NEXT:    slli a0, a0, 43
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 21
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -149533581377537
+}
+
+define i64 @li_rori_2() {
+; RV64I-LABEL: li_rori_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -5
+; RV64I-NEXT:    slli a0, a0, 60
+; RV64I-NEXT:    addi a0, a0, -6
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -5
+; RV64IZBA-NEXT:    slli a0, a0, 60
+; RV64IZBA-NEXT:    addi a0, a0, -6
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -86
+; RV64IZBB-NEXT:    rori a0, a0, 4
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -5
+; RV64IZBS-NEXT:    slli a0, a0, 60
+; RV64IZBS-NEXT:    addi a0, a0, -6
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -86
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 4
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5764607523034234886
+}
+
+define i64 @li_rori_3() {
+; RV64I-LABEL: li_rori_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -17
+; RV64I-NEXT:    slli a0, a0, 27
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -17
+; RV64IZBA-NEXT:    slli a0, a0, 27
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -18
+; RV64IZBB-NEXT:    rori a0, a0, 37
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -17
+; RV64IZBS-NEXT:    slli a0, a0, 27
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 37
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2281701377
+}
+
+; This used to assert when compiled with Zba.
+define i64 @PR54812() {
+; RV64I-LABEL: PR54812:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048447
+; RV64I-NEXT:    addiw a0, a0, 1407
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: PR54812:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 872917
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: PR54812:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048447
+; RV64IZBB-NEXT:    addiw a0, a0, 1407
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: PR54812:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1045887
+; RV64IZBS-NEXT:    bclri a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: PR54812:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048447
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1407
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2158497792;
+}
+
+define signext i32 @pos_2048() nounwind {
+; RV64I-LABEL: pos_2048:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_2048:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 11
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_2048:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 11
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_2048:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 11
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_2048:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 11
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2048
+}
+
+define i64 @imm64_same_lo_hi() nounwind {
+; RV64I-LABEL: imm64_same_lo_hi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 65793
+; RV64I-NEXT:    addiw a0, a0, 16
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_same_lo_hi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 65793
+; RV64IZBA-NEXT:    addiw a0, a0, 16
+; RV64IZBA-NEXT:    slli a1, a0, 32
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_same_lo_hi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 65793
+; RV64IZBB-NEXT:    addiw a0, a0, 16
+; RV64IZBB-NEXT:    slli a1, a0, 32
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_same_lo_hi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 65793
+; RV64IZBS-NEXT:    addiw a0, a0, 16
+; RV64IZBS-NEXT:    slli a1, a0, 32
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_same_lo_hi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 65793
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 16
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1157442765409226768 ; 0x0101010101010101
+}
+
+; Same as above with optsize. Make sure we use constant pool on RV64
+define i64 @imm64_same_lo_hi_optsize() nounwind optsize {
+; RV64-NOPOOL-LABEL: imm64_same_lo_hi_optsize:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 65793
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 16
+; RV64-NOPOOL-NEXT:    slli a1, a0, 32
+; RV64-NOPOOL-NEXT:    add a0, a0, a1
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm64_same_lo_hi_optsize:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI64_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 65793
+; RV64IZBA-NEXT:    addiw a0, a0, 16
+; RV64IZBA-NEXT:    slli a1, a0, 32
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 65793
+; RV64IZBB-NEXT:    addiw a0, a0, 16
+; RV64IZBB-NEXT:    slli a1, a0, 32
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 65793
+; RV64IZBS-NEXT:    addiw a0, a0, 16
+; RV64IZBS-NEXT:    slli a1, a0, 32
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_same_lo_hi_optsize:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 65793
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 16
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1157442765409226768 ; 0x0101010101010101
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
new file mode 100644
index 000000000000000..456a880891f7309
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+
+; Check indexed and unindexed, sext, zext and anyext loads
+
+define void @lb(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lb:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lb a2, 1(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = sext i8 %2 to i32
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i8, ptr %a
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lbu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lbu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a0, 1(a0)
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = zext i8 %2 to i32
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lh(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lh:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lh a2, 2(a0)
+; RV64I-NEXT:    lh zero, 0(a0)
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 1
+  %2 = load i16, ptr %1
+  %3 = sext i16 %2 to i32
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i16, ptr %a
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lhu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lhu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lhu a0, 2(a0)
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 1
+  %2 = load i16, ptr %1
+  %3 = zext i16 %2 to i32
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lw(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw zero, 0(a0)
+; RV64I-NEXT:    sd a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i64 1
+  %2 = load i32, ptr %1
+  %3 = sext i32 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i32, ptr %a
+  store i64 %3, ptr %b
+  ret void
+}
+
+define void @lwu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lwu a0, 4(a0)
+; RV64I-NEXT:    sd a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i64 1
+  %2 = load i32, ptr %1
+  %3 = zext i32 %2 to i64
+  store i64 %3, ptr %b
+  ret void
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
new file mode 100644
index 000000000000000..76ab0e7d5810e70
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+
+; Check indexed and unindexed, sext, zext and anyext loads
+
+define dso_local i64 @lb(ptr %a) nounwind {
+; RV64I-LABEL: lb:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lb a1, 1(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = sext i8 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i8, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lh(ptr %a) nounwind {
+; RV64I-LABEL: lh:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lh a1, 4(a0)
+; RV64I-NEXT:    lh zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 2
+  %2 = load i16, ptr %1
+  %3 = sext i16 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i16, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lw(ptr %a) nounwind {
+; RV64I-LABEL: lw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    lw zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i32 3
+  %2 = load i32, ptr %1
+  %3 = sext i32 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i32, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lbu(ptr %a) nounwind {
+; RV64I-LABEL: lbu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 4(a0)
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 4
+  %2 = load i8, ptr %1
+  %3 = zext i8 %2 to i64
+  %4 = load volatile i8, ptr %a
+  %5 = zext i8 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+define dso_local i64 @lhu(ptr %a) nounwind {
+; RV64I-LABEL: lhu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lhu a1, 10(a0)
+; RV64I-NEXT:    lhu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 5
+  %2 = load i16, ptr %1
+  %3 = zext i16 %2 to i64
+  %4 = load volatile i16, ptr %a
+  %5 = zext i16 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+define dso_local i64 @lwu(ptr %a) nounwind {
+; RV64I-LABEL: lwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lwu a1, 24(a0)
+; RV64I-NEXT:    lwu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i32 6
+  %2 = load i32, ptr %1
+  %3 = zext i32 %2 to i64
+  %4 = load volatile i32, ptr %a
+  %5 = zext i32 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+; 64-bit loads and stores
+
+define dso_local i64 @ld(ptr %a) nounwind {
+; RV64I-LABEL: ld:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ld a1, 80(a0)
+; RV64I-NEXT:    ld zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i64, ptr %a, i32 10
+  %2 = load i64, ptr %1
+  %3 = load volatile i64, ptr %a
+  ret i64 %2
+}
+
+define dso_local void @sd(ptr %a, i64 %b) nounwind {
+; RV64I-LABEL: sd:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    sd a1, 88(a0)
+; RV64I-NEXT:    ret
+  store i64 %b, ptr %a
+  %1 = getelementptr i64, ptr %a, i32 11
+  store i64 %b, ptr %1
+  ret void
+}
+
+; Check load and store to an i1 location
+define dso_local i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
+; RV64I-LABEL: load_sext_zext_anyext_i1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 1(a0)
+; RV64I-NEXT:    lbu a2, 2(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    sub a0, a2, a1
+; RV64I-NEXT:    ret
+  ; sextload i1
+  %1 = getelementptr i1, ptr %a, i32 1
+  %2 = load i1, ptr %1
+  %3 = sext i1 %2 to i64
+  ; zextload i1
+  %4 = getelementptr i1, ptr %a, i32 2
+  %5 = load i1, ptr %4
+  %6 = zext i1 %5 to i64
+  %7 = add i64 %3, %6
+  ; extload i1 (anyext). Produced as the load is unused.
+  %8 = load volatile i1, ptr %a
+  ret i64 %7
+}
+
+define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
+; RV64I-LABEL: load_sext_zext_anyext_i1_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 1(a0)
+; RV64I-NEXT:    lbu a2, 2(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    subw a0, a2, a1
+; RV64I-NEXT:    ret
+  ; sextload i1
+  %1 = getelementptr i1, ptr %a, i32 1
+  %2 = load i1, ptr %1
+  %3 = sext i1 %2 to i16
+  ; zextload i1
+  %4 = getelementptr i1, ptr %a, i32 2
+  %5 = load i1, ptr %4
+  %6 = zext i1 %5 to i16
+  %7 = add i16 %3, %6
+  ; extload i1 (anyext). Produced as the load is unused.
+  %8 = load volatile i1, ptr %a
+  ret i16 %7
+}
+
+; Check load and store to a global
+ at G = dso_local global i64 0
+
+define dso_local i64 @ld_sd_global(i64 %a) nounwind {
+; RV64I-LABEL: ld_sd_global:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, %hi(G)
+; RV64I-NEXT:    ld a1, %lo(G)(a2)
+; RV64I-NEXT:    addi a3, a2, %lo(G)
+; RV64I-NEXT:    sd a0, %lo(G)(a2)
+; RV64I-NEXT:    ld zero, 72(a3)
+; RV64I-NEXT:    sd a0, 72(a3)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = load volatile i64, ptr @G
+  store i64 %a, ptr @G
+  %2 = getelementptr i64, ptr @G, i64 9
+  %3 = load volatile i64, ptr %2
+  store i64 %a, ptr %2
+  ret i64 %1
+}
+
+define i64 @lw_near_local(ptr %a)  {
+; RV64I-LABEL: lw_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    ld a0, 9(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+define void @st_near_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    sd a1, 9(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  store i64 %b, ptr %1
+  ret void
+}
+
+define i64 @lw_sw_near_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, a0, 2047
+; RV64I-NEXT:    ld a0, 9(a2)
+; RV64I-NEXT:    sd a1, 9(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+define i64 @lw_far_local(ptr %a)  {
+; RV64I-LABEL: lw_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, -8(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+define void @st_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 8
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    sd a1, -8(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  store i64 %b, ptr %1
+  ret void
+}
+
+define i64 @lw_sw_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 8
+; RV64I-NEXT:    add a2, a0, a2
+; RV64I-NEXT:    ld a0, -8(a2)
+; RV64I-NEXT:    sd a1, -8(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the load offset. The sign extend of the
+; addiw is required.
+define i64 @lw_really_far_local(ptr %a)  {
+; RV64I-LABEL: lw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    addiw a1, a1, -2048
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the store offset. The sign extend of
+; the addiw is required.
+define void @st_really_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  store i64 %b, ptr %1
+  ret void
+}
+
+; Make sure we don't fold the addiw into the load/store offset. The sign extend
+; of the addiw is required.
+define i64 @lw_sw_really_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a2, a0, a2
+; RV64I-NEXT:    ld a0, 0(a2)
+; RV64I-NEXT:    sd a1, 0(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+%struct.quux = type { i32, [0 x i8] }
+
+; Make sure we don't remove the addi and fold the C from
+; (add (addi FrameIndex, C), X) into the store address.
+; FrameIndex cannot be the operand of an ADD. We must keep the ADDI.
+define void @addi_fold_crash(i64 %arg) nounwind {
+; RV64I-LABEL: addi_fold_crash:
+; RV64I:       # %bb.0: # %bb
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi a1, sp, 4
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    sb zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    call snork at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+bb:
+  %tmp = alloca %struct.quux, align 8
+  %tmp1 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1
+  %tmp2 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1, i64 %arg
+  store i8 0, ptr %tmp2, align 1
+  call void @snork(ptr %tmp1)
+  ret void
+}
+
+declare void @snork(ptr)

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
new file mode 100644
index 000000000000000..11adbbdd245f1d0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64IM %s
+
+define i32 @urem(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: urem:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @urem_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: urem_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i32 10, %a
+  ret i32 %1
+}
+
+define i32 @srem(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: srem:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srem_pow2(i32 %a) nounwind {
+; RV64I-LABEL: srem_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    add a1, a0, a1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    add a1, a0, a1
+; RV64IM-NEXT:    andi a1, a1, -8
+; RV64IM-NEXT:    subw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @srem_pow2_2(i32 %a) nounwind {
+; RV64I-LABEL: srem_pow2_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    add a1, a0, a1
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_pow2_2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    add a1, a0, a1
+; RV64IM-NEXT:    lui a2, 1048560
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    subw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, 65536
+  ret i32 %1
+}
+
+define i32 @srem_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: srem_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i32 -10, %a
+  ret i32 %1
+}
+
+define i64 @urem64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: urem64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __umoddi3 at plt
+;
+; RV64IM-LABEL: urem64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remu a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @urem64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: urem64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __umoddi3 at plt
+;
+; RV64IM-LABEL: urem64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remu a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i64 10, %a
+  ret i64 %1
+}
+
+define i64 @srem64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: srem64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __moddi3 at plt
+;
+; RV64IM-LABEL: srem64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    rem a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @srem64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: srem64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    tail __moddi3 at plt
+;
+; RV64IM-LABEL: srem64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    rem a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i64 -10, %a
+  ret i64 %1
+}
+
+define i8 @urem8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: urem8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a1, a1, 255
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @urem8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: urem8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i8 10, %a
+  ret i8 %1
+}
+
+
+define i8 @srem8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: srem8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @srem8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: srem8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a1, a0, 24
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i8 -10, %a
+  ret i8 %1
+}
+
+
+define i16 @urem16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: urem16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a2, 16
+; RV64IM-NEXT:    addi a2, a2, -1
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @urem16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: urem16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a1, a0, 48
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i16 10, %a
+  ret i16 %1
+}
+
+define i16 @srem16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: srem16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @srem16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: srem16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a1, a0, 16
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i16 -10, %a
+  ret i16 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
new file mode 100644
index 000000000000000..bb2f2b73d4a0c7d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
@@ -0,0 +1,902 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    j .LBB1_3
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:  .LBB1_3: # %cond.end
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    li a1, 31
+; RV64XTHEADBB-NEXT:    subw a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  %2 = sub i32 31, %1
+  ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    li s0, 32
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    beqz a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a1, a0, 24
+; RV64I-NEXT:  .LBB2_2: # %cond.end
+; RV64I-NEXT:    subw a0, s0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_ceil_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    li a1, 32
+; RV64XTHEADBB-NEXT:    subw a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = sub i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %3 = sub i32 32, %2
+  ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    or a0, s0, a0
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    xori a0, a0, 31
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findLastSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a1, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
+; RV64XTHEADBB-NEXT:    addi a1, a1, -32
+; RV64XTHEADBB-NEXT:    xori a1, a1, 31
+; RV64XTHEADBB-NEXT:    snez a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    or a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %2 = xor i32 31, %1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 -1, i32 %2
+  ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 1
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB6_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    negw a1, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB6_2:
+; RV64XTHEADBB-NEXT:    li a0, 32
+; RV64XTHEADBB-NEXT:    ret
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    negw a1, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findFirstSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    mv s0, a0
+; RV64XTHEADBB-NEXT:    negw a0, a0
+; RV64XTHEADBB-NEXT:    and a0, s0, a0
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    snez a1, s0
+; RV64XTHEADBB-NEXT:    addi a1, a1, -1
+; RV64XTHEADBB-NEXT:    or a0, a1, a0
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = icmp eq i32 %a, 0
+  %3 = select i1 %2, i32 -1, i32 %1
+  ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    seqz a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ffs_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    mv s0, a0
+; RV64XTHEADBB-NEXT:    negw a0, a0
+; RV64XTHEADBB-NEXT:    and a0, s0, a0
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    addi a0, a0, 1
+; RV64XTHEADBB-NEXT:    seqz a1, s0
+; RV64XTHEADBB-NEXT:    addi a1, a1, -1
+; RV64XTHEADBB-NEXT:    and a0, a1, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = add i32 %1, 1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 0, i32 %2
+  ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 58
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB10_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB10_2:
+; RV64XTHEADBB-NEXT:    li a0, 64
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: no_sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 17
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    slli a0, a0, 17
+; RV64XTHEADBB-NEXT:    sraiw a0, a0, 16
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 17
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i64 @no_sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: no_sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 49
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    slli a0, a0, 49
+; RV64XTHEADBB-NEXT:    srai a0, a0, 48
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 49
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+define i64 @zext_bf_i64(i64 %a) nounwind {
+; RV64I-LABEL: zext_bf_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 47
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zext_bf_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 16, 1
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i64 %a, 1
+  %and = and i64 %1, 65535
+  ret i64 %and
+}
+
+define i64 @zext_i64_srliw(i64 %a) nounwind {
+; RV64I-LABEL: zext_i64_srliw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zext_i64_srliw:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i64 %a, 16
+  %and = and i64 %1, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slliw a2, a2, 8
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 8
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addi a3, a3, -256
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    and a3, a0, a3
+; RV64I-NEXT:    slli a3, a3, 8
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32_nosext:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    sw a0, 0(a1)
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %1, ptr %x
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    srli a5, a0, 8
+; RV64I-NEXT:    srliw a5, a5, 24
+; RV64I-NEXT:    slli a5, a5, 24
+; RV64I-NEXT:    or a3, a5, a3
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    and a4, a0, a4
+; RV64I-NEXT:    slli a4, a4, 24
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a3, a4, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 40
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.rev a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
new file mode 100644
index 000000000000000..8005ad60b8a1105
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
@@ -0,0 +1,1798 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB
+
+define i64 @slliuw(i64 %a) nounwind {
+; RV64I-LABEL: slliuw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 1
+; RV64ZBA-NEXT:    ret
+  %conv1 = shl i64 %a, 1
+  %shl = and i64 %conv1, 8589934590
+  ret i64 %shl
+}
+
+define i128 @slliuw_2(i32 signext %0, ptr %1) {
+; RV64I-LABEL: slliuw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 28
+; RV64I-NEXT:    add a1, a1, a0
+; RV64I-NEXT:    ld a0, 0(a1)
+; RV64I-NEXT:    ld a1, 8(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 4
+; RV64ZBA-NEXT:    add a1, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a1)
+; RV64ZBA-NEXT:    ld a1, 8(a1)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i128, ptr %1, i64 %3
+  %5 = load i128, ptr %4
+  ret i128 %5
+}
+
+define i64 @adduw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    add.uw a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %and = and i64 %b, 4294967295
+  %add = add i64 %and, %a
+  ret i64 %add
+}
+
+define signext i8 @adduw_2(i32 signext %0, ptr %1) {
+; RV64I-LABEL: adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lb a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lb a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i8, ptr %1, i64 %3
+  %5 = load i8, ptr %4
+  ret i8 %5
+}
+
+define i64 @zextw_i64(i64 %a) nounwind {
+; RV64I-LABEL: zextw_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zextw_i64:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  %and = and i64 %a, 4294967295
+  ret i64 %and
+}
+
+; This makes sure targetShrinkDemandedConstant changes the and immmediate to
+; allow zext.w or slli+srli.
+define i64 @zextw_demandedbits_i64(i64 %0) {
+; RV64I-LABEL: zextw_demandedbits_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ori a0, a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zextw_demandedbits_i64:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    ori a0, a0, 1
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  %2 = and i64 %0, 4294967294
+  %3 = or i64 %2, 1
+  ret i64 %3
+}
+
+define signext i16 @sh1add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i16, ptr %1, i64 %0
+  %4 = load i16, ptr %3
+  ret i16 %4
+}
+
+define signext i32 @sh2add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i32, ptr %1, i64 %0
+  %4 = load i32, ptr %3
+  ret i32 %4
+}
+
+define i64 @sh3add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i64, ptr %1, i64 %0
+  %4 = load i64, ptr %3
+  ret i64 %4
+}
+
+define signext i16 @sh1adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh1adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i16, ptr %1, i64 %3
+  %5 = load i16, ptr %4
+  ret i16 %5
+}
+
+define i64 @sh1adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh1adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 1
+  %4 = and i64 %3, 8589934590
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+define signext i32 @sh2adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh2adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i32, ptr %1, i64 %3
+  %5 = load i32, ptr %4
+  ret i32 %5
+}
+
+define i64 @sh2adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh2adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 2
+  %4 = and i64 %3, 17179869180
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+define i64 @sh3adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh3adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i64, ptr %1, i64 %3
+  %5 = load i64, ptr %4
+  ret i64 %5
+}
+
+define i64 @sh3adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh3adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 3
+  %4 = and i64 %3, 34359738360
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+; Type legalization inserts a sext_inreg after the first add. That add will be
+; selected as sh2add which does not sign extend. SimplifyDemandedBits is unable
+; to remove the sext_inreg because it has multiple uses. The ashr will use the
+; sext_inreg to become sraiw. This leaves the sext_inreg only used by the shl.
+; If the shl is selected as sllw, we don't need the sext_inreg.
+define i64 @sh2add_extra_sext(i32 %x, i32 %y, i32 %z) {
+; RV64I-LABEL: sh2add_extra_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    sraiw a0, a0, 2
+; RV64I-NEXT:    mul a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2add_extra_sext:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a0, a0, 2
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    zext.w a1, a0
+; RV64ZBA-NEXT:    sllw a1, a2, a1
+; RV64ZBA-NEXT:    sraiw a0, a0, 2
+; RV64ZBA-NEXT:    mul a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %a = shl i32 %x, 2
+  %b = add i32 %a, %y
+  %c = shl i32 %z, %b
+  %d = ashr i32 %b, 2
+  %e = sext i32 %c to i64
+  %f = sext i32 %d to i64
+  %g = mul i64 %e, %f
+  ret i64 %g
+}
+
+define i64 @addmul6(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 6
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 6
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul10(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul10:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 10
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul10:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 10
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul12(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul12:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 12
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul12:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 12
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul18(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul18:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 18
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul18:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 18
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul20(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul20:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 20
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul20:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 20
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul24(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul24:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 24
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul24:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 24
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul36(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul36:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 36
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul36:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 36
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul40(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul40:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 40
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul40:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 40
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul72(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul72:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 72
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul72:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 72
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @mul96(i64 %a) {
+; RV64I-LABEL: mul96:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 96
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul96:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 96
+  ret i64 %c
+}
+
+define i64 @mul160(i64 %a) {
+; RV64I-LABEL: mul160:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 160
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul160:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 160
+  ret i64 %c
+}
+
+define i64 @mul288(i64 %a) {
+; RV64I-LABEL: mul288:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 288
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul288:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 288
+  ret i64 %c
+}
+
+define i64 @zext_mul96(i32 signext %a) {
+; RV64I-LABEL: zext_mul96:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul96:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 96
+  ret i64 %c
+}
+
+define i64 @zext_mul160(i32 signext %a) {
+; RV64I-LABEL: zext_mul160:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul160:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 160
+  ret i64 %c
+}
+
+define i64 @zext_mul288(i32 signext %a) {
+; RV64I-LABEL: zext_mul288:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 9
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul288:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 288
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul12884901888(i32 signext %a) {
+; RV64I-LABEL: zext_mul12884901888:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul12884901888:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 12884901888
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul21474836480(i32 signext %a) {
+; RV64I-LABEL: zext_mul21474836480:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul21474836480:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 21474836480
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul38654705664(i32 signext %a) {
+; RV64I-LABEL: zext_mul38654705664:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 9
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul38654705664:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 38654705664
+  ret i64 %c
+}
+
+define i64 @sh1add_imm(i64 %0) {
+; CHECK-LABEL: sh1add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, 5
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 1
+  %b = add i64 %a, 5
+  ret i64 %b
+}
+
+define i64 @sh2add_imm(i64 %0) {
+; CHECK-LABEL: sh2add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -6
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 2
+  %b = add i64 %a, -6
+  ret i64 %b
+}
+
+define i64 @sh3add_imm(i64 %0) {
+; CHECK-LABEL: sh3add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, 7
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 3
+  %b = add i64 %a, 7
+  ret i64 %b
+}
+
+define i64 @sh1adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh1adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    addi a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 1
+; RV64ZBA-NEXT:    addi a0, a0, 11
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 1
+  %c = add i64 %b, 11
+  ret i64 %c
+}
+
+define i64 @sh2adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh2adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    addi a0, a0, -12
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 2
+; RV64ZBA-NEXT:    addi a0, a0, -12
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 2
+  %c = add i64 %b, -12
+  ret i64 %c
+}
+
+define i64 @sh3adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh3adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    addi a0, a0, 13
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 3
+; RV64ZBA-NEXT:    addi a0, a0, 13
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 3
+  %c = add i64 %b, 13
+  ret i64 %c
+}
+
+define i64 @adduw_imm(i32 signext %0) nounwind {
+; RV64I-LABEL: adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 5
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    addi a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = add i64 %a, 5
+  ret i64 %b
+}
+
+define i64 @mul258(i64 %a) {
+; CHECK-LABEL: mul258:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 258
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 258
+  ret i64 %c
+}
+
+define i64 @mul260(i64 %a) {
+; CHECK-LABEL: mul260:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 260
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 260
+  ret i64 %c
+}
+
+define i64 @mul264(i64 %a) {
+; CHECK-LABEL: mul264:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 264
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 264
+  ret i64 %c
+}
+
+define i64 @imm_zextw() nounwind {
+; RV64I-LABEL: imm_zextw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, -2
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: imm_zextw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a0, -2
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  ret i64 4294967294 ; -2 in 32 bits.
+}
+
+define i64 @mul11(i64 %a) {
+; RV64I-LABEL: mul11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 11
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul11:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 11
+  ret i64 %c
+}
+
+define i64 @mul19(i64 %a) {
+; RV64I-LABEL: mul19:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 19
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul19:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 19
+  ret i64 %c
+}
+
+define i64 @mul13(i64 %a) {
+; RV64I-LABEL: mul13:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 13
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul13:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 13
+  ret i64 %c
+}
+
+define i64 @mul21(i64 %a) {
+; RV64I-LABEL: mul21:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 21
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul21:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 21
+  ret i64 %c
+}
+
+define i64 @mul37(i64 %a) {
+; RV64I-LABEL: mul37:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 37
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul37:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 37
+  ret i64 %c
+}
+
+define i64 @mul25(i64 %a) {
+; RV64I-LABEL: mul25:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 25
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul25:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 25
+  ret i64 %c
+}
+
+define i64 @mul41(i64 %a) {
+; RV64I-LABEL: mul41:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 41
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul41:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 41
+  ret i64 %c
+}
+
+define i64 @mul73(i64 %a) {
+; RV64I-LABEL: mul73:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 73
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul73:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 73
+  ret i64 %c
+}
+
+define i64 @mul27(i64 %a) {
+; RV64I-LABEL: mul27:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 27
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul27:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 27
+  ret i64 %c
+}
+
+define i64 @mul45(i64 %a) {
+; RV64I-LABEL: mul45:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 45
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul45:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 45
+  ret i64 %c
+}
+
+define i64 @mul81(i64 %a) {
+; RV64I-LABEL: mul81:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 81
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul81:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 81
+  ret i64 %c
+}
+
+define i64 @mul4098(i64 %a) {
+; RV64I-LABEL: mul4098:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4098:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4098
+  ret i64 %c
+}
+
+define i64 @mul4100(i64 %a) {
+; RV64I-LABEL: mul4100:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4100:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4100
+  ret i64 %c
+}
+
+define i64 @mul4104(i64 %a) {
+; RV64I-LABEL: mul4104:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 3
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4104:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4104
+  ret i64 %c
+}
+
+define signext i32 @mulw192(i32 signext %a) {
+; CHECK-LABEL: mulw192:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 192
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 192
+  ret i32 %c
+}
+
+define signext i32 @mulw320(i32 signext %a) {
+; CHECK-LABEL: mulw320:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 320
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 320
+  ret i32 %c
+}
+
+define signext i32 @mulw576(i32 signext %a) {
+; CHECK-LABEL: mulw576:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 576
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 576
+  ret i32 %c
+}
+
+define i64 @add4104(i64 %a) {
+; RV64I-LABEL: add4104:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: add4104:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a1, 1026
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = add i64 %a, 4104
+  ret i64 %c
+}
+
+define i64 @add8208(i64 %a) {
+; RV64I-LABEL: add8208:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 2
+; RV64I-NEXT:    addiw a1, a1, 16
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: add8208:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a1, 1026
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = add i64 %a, 8208
+  ret i64 %c
+}
+
+; Make sure we prefer LUI for the 8192 instead of using sh3add.
+define signext i32 @add8192_i32(i32 signext %a) {
+; CHECK-LABEL: add8192_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 2
+; CHECK-NEXT:    addw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = add i32 %a, 8192
+  ret i32 %c
+}
+
+; Make sure we prefer LUI for the 8192 instead of using sh3add.
+define i64 @add8192(i64 %a) {
+; CHECK-LABEL: add8192:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = add i64 %a, 8192
+  ret i64 %c
+}
+
+define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 6
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 1
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 6
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_6(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 6
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 6
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 7
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_7:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 2
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 7
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_7(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_7:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 7
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_8:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 3
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 8
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_8(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_8:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 8
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+; Make sure we use sext.h+slli+srli for Zba+Zbb.
+; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
+define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
+; RV64I-LABEL: sext_ashr_zext_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 31
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
+; RV64ZBANOZBB:       # %bb.0:
+; RV64ZBANOZBB-NEXT:    slli a0, a0, 24
+; RV64ZBANOZBB-NEXT:    sraiw a0, a0, 31
+; RV64ZBANOZBB-NEXT:    zext.w a0, a0
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
+; RV64ZBAZBB:       # %bb.0:
+; RV64ZBAZBB-NEXT:    sext.b a0, a0
+; RV64ZBAZBB-NEXT:    sraiw a0, a0, 9
+; RV64ZBAZBB-NEXT:    zext.w a0, a0
+; RV64ZBAZBB-NEXT:    ret
+  %ext = sext i8 %a to i32
+  %1 = ashr i32 %ext, 9
+  ret i32 %1
+}
+
+; Make sure we use sext.h+slli+srli for Zba+Zbb.
+; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
+define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
+; RV64I-LABEL: sext_ashr_zext_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 25
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
+; RV64ZBANOZBB:       # %bb.0:
+; RV64ZBANOZBB-NEXT:    slli a0, a0, 16
+; RV64ZBANOZBB-NEXT:    sraiw a0, a0, 25
+; RV64ZBANOZBB-NEXT:    zext.w a0, a0
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
+; RV64ZBAZBB:       # %bb.0:
+; RV64ZBAZBB-NEXT:    slli a0, a0, 48
+; RV64ZBAZBB-NEXT:    srai a0, a0, 57
+; RV64ZBAZBB-NEXT:    zext.w a0, a0
+; RV64ZBAZBB-NEXT:    ret
+  %ext = sext i16 %a to i32
+  %1 = ashr i32 %ext, 9
+  ret i32 %1
+}
+
+; This the IR you get from InstCombine if take the 
diff erence of 2 pointers and
+; cast is to unsigned before using as an index.
+define signext i16 @sh1adduw_ptr
diff (i64 %
diff , ptr %baseptr) {
+; RV64I-LABEL: sh1adduw_ptr
diff :
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 33
+; RV64I-NEXT:    addi a2, a2, -2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_ptr
diff :
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 1
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptr
diff  = lshr exact i64 %
diff , 1
+  %cast = and i64 %ptr
diff , 4294967295
+  %ptr = getelementptr inbounds i16, ptr %baseptr, i64 %cast
+  %res = load i16, ptr %ptr
+  ret i16 %res
+}
+
+define signext i32 @sh2adduw_ptr
diff (i64 %
diff , ptr %baseptr) {
+; RV64I-LABEL: sh2adduw_ptr
diff :
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 34
+; RV64I-NEXT:    addi a2, a2, -4
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_ptr
diff :
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 2
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptr
diff  = lshr exact i64 %
diff , 2
+  %cast = and i64 %ptr
diff , 4294967295
+  %ptr = getelementptr inbounds i32, ptr %baseptr, i64 %cast
+  %res = load i32, ptr %ptr
+  ret i32 %res
+}
+
+define i64 @sh3adduw_ptr
diff (i64 %
diff , ptr %baseptr) {
+; RV64I-LABEL: sh3adduw_ptr
diff :
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 35
+; RV64I-NEXT:    addi a2, a2, -8
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_ptr
diff :
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 3
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptr
diff  = lshr exact i64 %
diff , 3
+  %cast = and i64 %ptr
diff , 4294967295
+  %ptr = getelementptr inbounds i64, ptr %baseptr, i64 %cast
+  %res = load i64, ptr %ptr
+  ret i64 %res
+}
+
+define signext i16 @srliw_1_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+define i128 @slliuw_ptr
diff (i64 %
diff , ptr %baseptr) {
+; RV64I-LABEL: slliuw_ptr
diff :
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 36
+; RV64I-NEXT:    addi a2, a2, -16
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a1, a1, a0
+; RV64I-NEXT:    ld a0, 0(a1)
+; RV64I-NEXT:    ld a1, 8(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw_ptr
diff :
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 4
+; RV64ZBA-NEXT:    slli.uw a0, a0, 4
+; RV64ZBA-NEXT:    add a1, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a1)
+; RV64ZBA-NEXT:    ld a1, 8(a1)
+; RV64ZBA-NEXT:    ret
+  %ptr
diff  = lshr exact i64 %
diff , 4
+  %cast = and i64 %ptr
diff , 4294967295
+  %ptr = getelementptr inbounds i128, ptr %baseptr, i64 %cast
+  %res = load i128, ptr %ptr
+  ret i128 %res
+}
+
+define signext i32 @srliw_2_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_3_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 3
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_3_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 3
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 3
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i32 @srliw_1_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_1_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define i64 @srliw_2_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i16 @srliw_2_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+
+define signext i32 @srliw_3_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_3_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 3
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_3_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 3
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 3
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_4_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_4_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 4
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_4_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 4
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 4
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i32 @srli_1_sh2add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_1_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_1_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 1
+  %4 = getelementptr inbounds i32, ptr %0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  ret i32 %5
+}
+
+define i64 @srli_2_sh3add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_2_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_2_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 2
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 2
+  %4 = getelementptr inbounds i64, ptr %0, i64 %3
+  %5 = load i64, ptr %4, align 8
+  ret i64 %5
+}
+
+define signext i16 @srli_2_sh1add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 2
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 2
+  %4 = getelementptr inbounds i16, ptr %0, i64 %3
+  %5 = load i16, ptr %4, align 2
+  ret i16 %5
+}
+
+define signext i32 @srli_3_sh2add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_3_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_3_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 3
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 3
+  %4 = getelementptr inbounds i32, ptr %0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  ret i32 %5
+}
+
+define i64 @srli_4_sh3add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_4_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_4_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 4
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 4
+  %4 = getelementptr inbounds i64, ptr %0, i64 %3
+  %5 = load i64, ptr %4, align 8
+  ret i64 %5
+}
+
+define signext i16 @shl_2_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+define signext i32 @shl_16_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_16_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_16_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 16
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 16
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @shl_31_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_31_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_31_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 31
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 31
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
new file mode 100644
index 000000000000000..1ab37493b0ec61e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBB
+
+declare i32 @llvm.riscv.orc.b.i32(i32)
+
+define signext i32 @orcb32(i32 signext %a) nounwind {
+; RV64ZBB-LABEL: orcb32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a)
+  ret i32 %tmp
+}
+
+define zeroext i32 @orcb32_zext(i32 zeroext %a) nounwind {
+; RV64ZBB-LABEL: orcb32_zext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a)
+  ret i32 %tmp
+}
+
+; Second and+or is redundant with the first, make sure we remove them.
+define signext i32 @orcb32_knownbits(i32 signext %a) nounwind {
+; RV64ZBB-LABEL: orcb32_knownbits:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 1044480
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, 2048
+; RV64ZBB-NEXT:    addi a1, a1, 1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = and i32 %a, 4278190080 ; 0xFF000000
+  %tmp2 = or i32 %tmp, 8388609 ; 0x800001
+  %tmp3 = call i32 @llvm.riscv.orc.b.i32(i32 %tmp2)
+  %tmp4 = and i32 %tmp3, 4278190080 ; 0xFF000000
+  %tmp5 = or i32 %tmp4, 16711935 ; 0xFF00FF
+  ret i32 %tmp5
+}
+
+declare i64 @llvm.riscv.orc.b.i64(i64)
+
+define i64 @orcb64(i64 %a) nounwind {
+; RV64ZBB-LABEL: orcb64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.orc.b.i64(i64 %a)
+  ret i64 %tmp
+}
+
+; Second and+or is redundant with the first, make sure we remove them.
+define i64 @orcb64_knownbits(i64 %a) nounwind {
+; RV64ZBB-LABEL: orcb64_knownbits:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 65535
+; RV64ZBB-NEXT:    slli a1, a1, 12
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, 256
+; RV64ZBB-NEXT:    addiw a1, a1, 8
+; RV64ZBB-NEXT:    slli a2, a1, 42
+; RV64ZBB-NEXT:    add a1, a1, a2
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = and i64 %a, 1099494850560 ; 0x000000ffff000000
+  %tmp2 = or i64 %tmp, 4611721202800525320 ; 0x4000200000100008
+  %tmp3 = call i64 @llvm.riscv.orc.b.i64(i64 %tmp2)
+  %tmp4 = and i64 %tmp3, 1099494850560 ; 0x000000ffff000000
+  %tmp5 = or i64 %tmp4, 18374966855153418495 ; 0xff00ff0000ff00ff
+  ret i64 %tmp5
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
new file mode 100644
index 000000000000000..e6e9829c16f22b8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
@@ -0,0 +1,600 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBKB
+
+define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: orn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: orn_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: orn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: orn_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: xnor_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: xnor_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: xnor_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: xnor_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: rol_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a2, a1, -1
+; RV64I-NEXT:    sllw a1, a0, a1
+; RV64I-NEXT:    negw a2, a2
+; RV64I-NEXT:    srlw a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+; Similar to rol_i32, but doesn't sign extend the result.
+define void @rol_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
+; RV64I-LABEL: rol_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a1, -1
+; RV64I-NEXT:    sllw a1, a0, a1
+; RV64I-NEXT:    negw a3, a3
+; RV64I-NEXT:    srlw a0, a0, a3
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sw a0, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
+; RV64I-LABEL: rol_i32_neg_constant_rhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a0, -1
+; RV64I-NEXT:    li a2, -2
+; RV64I-NEXT:    sllw a0, a2, a0
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    srlw a1, a2, a1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32_neg_constant_rhs:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 -2, i32 -2, i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: rol_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sll a2, a0, a1
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rol a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: ror_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a2, a1, -1
+; RV64I-NEXT:    srlw a1, a0, a1
+; RV64I-NEXT:    negw a2, a2
+; RV64I-NEXT:    sllw a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+; Similar to ror_i32, but doesn't sign extend the result.
+define void @ror_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
+; RV64I-LABEL: ror_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a1, -1
+; RV64I-NEXT:    srlw a1, a0, a1
+; RV64I-NEXT:    negw a3, a3
+; RV64I-NEXT:    sllw a0, a0, a3
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sw a0, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
+; RV64I-LABEL: ror_i32_neg_constant_rhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a0, -1
+; RV64I-NEXT:    li a2, -2
+; RV64I-NEXT:    srlw a0, a2, a0
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32_neg_constant_rhs:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 -2, i32 -2, i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: ror_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a2, a0, a1
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    ror a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32_fshl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    slliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshl:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+; Similar to rori_i32_fshl, but doesn't sign extend the result.
+define void @rori_i32_fshl_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: rori_i32_fshl_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshl_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @rori_i32_fshr(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32_fshr:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slliw a1, a0, 1
+; RV64I-NEXT:    srliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshr:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+; Similar to rori_i32_fshr, but doesn't sign extend the result.
+define void @rori_i32_fshr_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: rori_i32_fshr_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    srliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshr_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  store i32 %1, ptr %x
+  ret void
+}
+
+; This test is similar to the type legalized version of the fshl/fshr tests, but
+; instead of having the same input to both shifts it has 
diff erent inputs. Make
+; sure we don't match it as a roriw.
+define signext i32 @not_rori_i32(i32 signext %x, i32 signext %y) nounwind {
+; CHECK-LABEL: not_rori_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slliw a0, a0, 31
+; CHECK-NEXT:    srliw a1, a1, 1
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    ret
+  %a = shl i32 %x, 31
+  %b = lshr i32 %y, 1
+  %c = or i32 %a, %b
+  ret i32 %c
+}
+
+; This is similar to the type legalized roriw pattern, but the and mask is more
+; than 32 bits so the lshr doesn't shift zeroes into the lower 32 bits. Make
+; sure we don't match it to roriw.
+define i64 @roriw_bug(i64 %x) nounwind {
+; CHECK-LABEL: roriw_bug:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a1, a0, 31
+; CHECK-NEXT:    andi a2, a0, -2
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    xor a0, a2, a0
+; CHECK-NEXT:    ret
+  %a = shl i64 %x, 31
+  %b = and i64 %x, 18446744073709551614
+  %c = lshr i64 %b, 1
+  %d = or i64 %a, %c
+  %e = shl i64 %d, 32
+  %f = ashr i64 %e, 32
+  %g = xor i64 %b, %f ; to increase the use count on %b to disable SimplifyDemandedBits.
+  ret i64 %g
+}
+
+define i64 @rori_i64_fshl(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64_fshl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i64_fshl:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rori a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i64 @rori_i64_fshr(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64_fshr:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i64_fshr:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rori a0, a0, 63
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define signext i32 @not_shl_one_i32(i32 signext %x) {
+; RV64I-LABEL: not_shl_one_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sllw a0, a1, a0
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: not_shl_one_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = shl i32 1, %x
+  %2 = xor i32 %1, -1
+  ret i32 %2
+}
+
+define i64 @not_shl_one_i64(i64 %x) {
+; RV64I-LABEL: not_shl_one_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sll a0, a1, a0
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: not_shl_one_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rol a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = shl i64 1, %x
+  %2 = xor i64 %1, -1
+  ret i64 %2
+}
+
+define i8 @srli_i8(i8 %a) nounwind {
+; CHECK-LABEL: srli_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, 192
+; CHECK-NEXT:    srliw a0, a0, 6
+; CHECK-NEXT:    ret
+  %1 = lshr i8 %a, 6
+  ret i8 %1
+}
+
+; We could use sext.b+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i8 @srai_i8(i8 %a) nounwind {
+; RV64I-LABEL: srai_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 29
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srai_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srai a0, a0, 61
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srai_i8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 24
+; RV64ZBKB-NEXT:    sraiw a0, a0, 29
+; RV64ZBKB-NEXT:    ret
+  %1 = ashr i8 %a, 5
+  ret i8 %1
+}
+
+; We could use zext.h+srli, but slli+srli offers more opportunities for
+; comppressed instructions.
+define i16 @srli_i16(i16 %a) nounwind {
+; RV64I-LABEL: srli_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    srliw a0, a0, 6
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srli_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    srliw a0, a0, 6
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srli_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    srliw a0, a0, 6
+; RV64ZBKB-NEXT:    ret
+  %1 = lshr i16 %a, 6
+  ret i16 %1
+}
+
+; We could use sext.h+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i16 @srai_i16(i16 %a) nounwind {
+; RV64I-LABEL: srai_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 25
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srai_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    slli a0, a0, 48
+; RV64ZBB-NEXT:    srai a0, a0, 57
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srai_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 16
+; RV64ZBKB-NEXT:    sraiw a0, a0, 25
+; RV64ZBKB-NEXT:    ret
+  %1 = ashr i16 %a, 9
+  ret i16 %1
+}
+
+define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_seqz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_seqz_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp eq i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_seqz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_seqz_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp eq i64 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_snez_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_snez_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp ne i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_snez_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_snez_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp ne i64 %and, %b
+  ret i1 %cmpeq
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
new file mode 100644
index 000000000000000..acc175186b85863
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
@@ -0,0 +1,1068 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    j .LBB1_3
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:  .LBB1_3: # %cond.end
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: log2_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    li a1, 31
+; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  %2 = sub i32 31, %1
+  ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    li s0, 32
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    beqz a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a1, a0, 24
+; RV64I-NEXT:  .LBB2_2: # %cond.end
+; RV64I-NEXT:    subw a0, s0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: log2_ceil_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    li a1, 32
+; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %1 = sub i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %3 = sub i32 32, %2
+  ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    or a0, s0, a0
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    xori a0, a0, 31
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: findLastSet_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a1, a0
+; RV64ZBB-NEXT:    xori a1, a1, 31
+; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    addiw a0, a0, -1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %2 = xor i32 31, %1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 -1, i32 %2
+  ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_lshr_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srliw a0, a0, 1
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = lshr i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clz a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_zero_undef_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: findFirstSet_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a1, a0
+; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    addiw a0, a0, -1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = icmp eq i32 %a, 0
+  %3 = select i1 %2, i32 -1, i32 %1
+  ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    seqz a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ffs_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a1, a0
+; RV64ZBB-NEXT:    addi a1, a1, 1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = add i32 %1, 1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 0, i32 %2
+  ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 58
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define signext i32 @ctpop_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctpop_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+define signext i32 @ctpop_i32_load(ptr %p) nounwind {
+; RV64I-LABEL: ctpop_i32_load:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i32_load:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lw a0, 0(a0)
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    ret
+  %a = load i32, ptr %p
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctpop_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sextb_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sextb_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sexth_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sexth_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: min_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB18_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB18_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: min_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp slt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: min_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB19_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: min_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp slt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: max_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB20_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB20_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: max_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: max_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB21_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB21_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: max_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: minu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB22_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB22_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: minu_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ult i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: minu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB23_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB23_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: minu_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ult i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: maxu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB24_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB24_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: maxu_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: maxu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB25_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB25_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: maxu_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+declare i32 @llvm.abs.i32(i32, i1 immarg)
+
+define i32 @abs_i32(i32 %x) {
+; RV64I-LABEL: abs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    xor a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
+  ret i32 %abs
+}
+
+define signext i32 @abs_i32_sext(i32 signext %x) {
+; RV64I-LABEL: abs_i32_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i32_sext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    xor a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
+  ret i32 %abs
+}
+
+declare i64 @llvm.abs.i64(i64, i1 immarg)
+
+define i64 @abs_i64(i64 %x) {
+; RV64I-LABEL: abs_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
+  ret i64 %abs
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: zexth_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: zexth_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slliw a2, a2, 8
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srai a0, a0, 32
+; RV64ZBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 8
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addi a3, a3, -256
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    and a3, a0, a3
+; RV64I-NEXT:    slli a3, a3, 8
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i32_nosext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %1, ptr %x
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    srli a5, a0, 8
+; RV64I-NEXT:    srliw a5, a5, 24
+; RV64I-NEXT:    slli a5, a5, 24
+; RV64I-NEXT:    or a3, a5, a3
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    and a4, a0, a4
+; RV64I-NEXT:    slli a4, a4, 24
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a3, a4, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 40
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
new file mode 100644
index 000000000000000..9b37e8729576ff5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC
+
+declare i64 @llvm.riscv.clmulr.i64(i64 %a, i64 %b)
+
+define i64 @clmul64r(i64 %a, i64 %b) nounwind {
+; RV64ZBC-LABEL: clmul64r:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmulr.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32r(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-LABEL: clmul32r:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    slli a1, a1, 32
+; RV64ZBC-NEXT:    slli a0, a0, 32
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
+; RV64ZBC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+; FIXME: We could avoid the slli instructions by using clmul+srli+sext.w since
+; the inputs are zero extended.
+define signext i32 @clmul32r_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-LABEL: clmul32r_zext:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    slli a1, a1, 32
+; RV64ZBC-NEXT:    slli a0, a0, 32
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
+; RV64ZBC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
new file mode 100644
index 000000000000000..e0c9740a9c4bb29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+; RUN: llc -mtriple=riscv64 -mattr=+zbkc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+
+declare i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+
+define i64 @clmul64(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+
+define i64 @clmul64h(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64h:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    sext.w a0, a0
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32h(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32h:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
+; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+; FIXME: We could avoid the slli instructions by using clmul+srai since the
+; inputs are zero extended.
+define signext i32 @clmul32h_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32h_zext:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
+; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
new file mode 100644
index 000000000000000..3169f65f646718b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBKB
+
+declare i64 @llvm.riscv.brev8.i64(i64)
+
+define i64 @brev8(i64 %a) nounwind {
+; RV64ZBKB-LABEL: brev8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %val = call i64 @llvm.riscv.brev8.i64(i64 %a)
+  ret i64 %val
+}
+
+; Test that rev8 is recognized as preserving zero extension.
+define zeroext i16 @brev8_knownbits(i16 zeroext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_knownbits:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %zext = zext i16 %a to i64
+  %val = call i64 @llvm.riscv.brev8.i64(i64 %zext)
+  %trunc = trunc i64 %val to i16
+  ret i16 %trunc
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @rev8_i64(i64 %a) {
+; RV64ZBKB-LABEL: rev8_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.riscv.brev8.i32(i32)
+
+define signext i32 @brev8_i32(i32 signext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    sext.w a0, a0
+; RV64ZBKB-NEXT:    ret
+  %val = call i32 @llvm.riscv.brev8.i32(i32 %a)
+  ret i32 %val
+}
+
+; Test that rev8 is recognized as preserving zero extension.
+define zeroext i16 @brev8_i32_knownbits(i16 zeroext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_i32_knownbits:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %zext = zext i16 %a to i32
+  %val = call i32 @llvm.riscv.brev8.i32(i32 %zext)
+  %trunc = trunc i32 %val to i16
+  ret i16 %trunc
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @rev8_i32(i32 signext %a) {
+; RV64ZBKB-LABEL: rev8_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srai a0, a0, 32
+; RV64ZBKB-NEXT:    ret
+  %1 = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
new file mode 100644
index 000000000000000..c4680a5e15120f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
@@ -0,0 +1,1000 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbs -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBS
+
+define signext i32 @bclr_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define signext i32 @bclr_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define signext i32 @bclr_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bclr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclr_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclr a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %b, 63
+  %shl = shl nuw i64 1, %and
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define i64 @bclr_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bclr_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclr_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclr a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %b
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define signext i32 @bset_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+define signext i32 @bset_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+define signext i32 @bset_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+; We can use bsetw for 1 << x by setting the first source to zero.
+define signext i32 @bset_i32_zero(i32 signext %a) nounwind {
+; CHECK-LABEL: bset_i32_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    sllw a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %a
+  ret i32 %shl
+}
+
+define i64 @bset_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bset_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %or = or i64 %shl, %a
+  ret i64 %or
+}
+
+define i64 @bset_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bset_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %b
+  %or = or i64 %shl, %a
+  ret i64 %or
+}
+
+; We can use bsetw for 1 << x by setting the first source to zero.
+define signext i64 @bset_i64_zero(i64 signext %a) nounwind {
+; RV64I-LABEL: bset_i64_zero:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sll a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64_zero:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, zero, a0
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %a
+  ret i64 %shl
+}
+
+define signext i32 @binv_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define signext i32 @binv_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define signext i32 @binv_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define i64 @binv_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: binv_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binv_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binv a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %xor = xor i64 %shl, %a
+  ret i64 %xor
+}
+
+define i64 @binv_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: binv_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binv_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binv a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl nuw i64 1, %b
+  %xor = xor i64 %shl, %a
+  ret i64 %xor
+}
+
+define signext i32 @bext_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: bext_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i32:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    andi a1, a1, 31
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shr = lshr i32 %a, %and
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+define signext i32 @bext_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: bext_i32_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i32_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i32 %a, %b
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+; This gets previous converted to (i1 (truncate (srl X, Y)). Make sure we are
+; able to use bext.
+define void @bext_i32_trunc(i32 signext %0, i32 signext %1) {
+; RV64I-LABEL: bext_i32_trunc:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB19_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    tail bar at plt
+;
+; RV64ZBS-LABEL: bext_i32_trunc:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    beqz a0, .LBB19_2
+; RV64ZBS-NEXT:  # %bb.1:
+; RV64ZBS-NEXT:    ret
+; RV64ZBS-NEXT:  .LBB19_2:
+; RV64ZBS-NEXT:    tail bar at plt
+  %3 = shl i32 1, %1
+  %4 = and i32 %3, %0
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %6, label %7
+
+6:                                                ; preds = %2
+  tail call void @bar()
+  br label %7
+
+7:                                                ; preds = %6, %2
+  ret void
+}
+
+declare void @bar()
+
+define i64 @bext_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bext_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shr = lshr i64 %a, %conv
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define i64 @bext_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bext_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i64 %a, %b
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define signext i32 @bexti_i32(i32 signext %a) nounwind {
+; CHECK-LABEL: bexti_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a0, a0, 5
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %shr = lshr i32 %a, 5
+  %and = and i32 %shr, 1
+  ret i32 %and
+}
+
+define i64 @bexti_i64(i64 %a) nounwind {
+; RV64I-LABEL: bexti_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i64 %a, 5
+  %and = and i64 %shr, 1
+  ret i64 %and
+}
+
+define signext i32 @bexti_i32_cmp(i32 signext %a) nounwind {
+; RV64I-LABEL: bexti_i32_cmp:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i32_cmp:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, 32
+  %cmp = icmp ne i32 %and, 0
+  %zext = zext i1 %cmp to i32
+  ret i32 %zext
+}
+
+define i64 @bexti_i64_cmp(i64 %a) nounwind {
+; RV64I-LABEL: bexti_i64_cmp:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i64_cmp:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, 32
+  %cmp = icmp ne i64 %and, 0
+  %zext = zext i1 %cmp to i64
+  ret i64 %zext
+}
+
+define signext i32 @bclri_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: bclri_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, -1025
+; CHECK-NEXT:    ret
+  %and = and i32 %a, -1025
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: bclri_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048575
+; RV64I-NEXT:    addiw a1, a1, 2047
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, -2049
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: bclri_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 786432
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, -1073741825
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: bclri_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 33
+; CHECK-NEXT:    srli a0, a0, 33
+; CHECK-NEXT:    ret
+  %and = and i32 %a, -2147483649
+  ret i32 %and
+}
+
+define i64 @bclri_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: bclri_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, -1025
+; CHECK-NEXT:    ret
+  %and = and i64 %a, -1025
+  ret i64 %and
+}
+
+define i64 @bclri_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048575
+; RV64I-NEXT:    addiw a1, a1, 2047
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -2049
+  ret i64 %and
+}
+
+define i64 @bclri_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 786432
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -1073741825
+  ret i64 %and
+}
+
+define i64 @bclri_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -2147483649
+  ret i64 %and
+}
+
+define i64 @bclri_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -4611686018427387905
+  ret i64 %and
+}
+
+define i64 @bclri_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -9223372036854775809
+  ret i64 %and
+}
+
+define i64 @bclri_i64_large0(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_large0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044480
+; RV64I-NEXT:    addiw a1, a1, -256
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_large0:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    andi a0, a0, -256
+; RV64ZBS-NEXT:    bclri a0, a0, 24
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -16777472
+  ret i64 %and
+}
+
+define i64 @bclri_i64_large1(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_large1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044464
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_large1:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 16
+; RV64ZBS-NEXT:    bclri a0, a0, 24
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -16842753
+  ret i64 %and
+}
+
+define signext i32 @bseti_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: bseti_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %or = or i32 %a, 1024
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: bseti_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %or = or i32 %a, 2048
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: bseti_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %or = or i32 %a, 1073741824
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: bseti_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    ret
+  %or = or i32 %a, 2147483648
+  ret i32 %or
+}
+
+define i64 @bseti_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: bseti_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %or = or i64 %a, 1024
+  ret i64 %or
+}
+
+define i64 @bseti_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 2048
+  ret i64 %or
+}
+
+define i64 @bseti_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 1073741824
+  ret i64 %or
+}
+
+define i64 @bseti_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 2147483648
+  ret i64 %or
+}
+
+define i64 @bseti_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4611686018427387904
+  ret i64 %or
+}
+
+define i64 @bseti_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 9223372036854775808
+  ret i64 %or
+}
+
+define signext i32 @binvi_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: binvi_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %xor = xor i32 %a, 1024
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: binvi_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %xor = xor i32 %a, 2048
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: binvi_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %xor = xor i32 %a, 1073741824
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: binvi_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    ret
+  %xor = xor i32 %a, 2147483648
+  ret i32 %xor
+}
+
+define i64 @binvi_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: binvi_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %xor = xor i64 %a, 1024
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 2048
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 1073741824
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 2147483648
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4611686018427387904
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 9223372036854775808
+  ret i64 %xor
+}
+
+define i64 @xor_i64_large(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_large:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_large:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 0
+; RV64ZBS-NEXT:    binvi a0, a0, 32
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4294967297
+  ret i64 %xor
+}
+
+define i64 @xor_i64_4099(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_4099:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 3
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_4099:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    xori a0, a0, 3
+; RV64ZBS-NEXT:    binvi a0, a0, 12
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4099
+  ret i64 %xor
+}
+
+define i64 @xor_i64_96(i64 %a) nounwind {
+; CHECK-LABEL: xor_i64_96:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 96
+; CHECK-NEXT:    ret
+  %xor = xor i64 %a, 96
+  ret i64 %xor
+}
+
+define i64 @or_i64_large(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_large:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_large:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 0
+; RV64ZBS-NEXT:    bseti a0, a0, 32
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4294967297
+  ret i64 %or
+}
+
+define i64 @xor_i64_66901(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_66901:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 16
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_66901:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    xori a0, a0, 1365
+; RV64ZBS-NEXT:    binvi a0, a0, 16
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 66901
+  ret i64 %xor
+}
+
+define i64 @or_i64_4099(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_4099:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_4099:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    ori a0, a0, 3
+; RV64ZBS-NEXT:    bseti a0, a0, 12
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4099
+  ret i64 %or
+}
+
+define i64 @or_i64_96(i64 %a) nounwind {
+; CHECK-LABEL: or_i64_96:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 96
+; CHECK-NEXT:    ret
+  %or = or i64 %a, 96
+  ret i64 %or
+}
+
+define i64 @or_i64_66901(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_66901:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 16
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_66901:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    ori a0, a0, 1365
+; RV64ZBS-NEXT:    bseti a0, a0, 16
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 66901
+  ret i64 %or
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
new file mode 100644
index 000000000000000..774d1398644b984
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
@@ -0,0 +1,1308 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64
+
+;
+; Get the actual value of the overflow bit.
+;
+define zeroext i1 @saddo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: saddo1.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a3, a0, a1
+; RV64-NEXT:    slt a0, a3, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    sw a3, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo3.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, -4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo4.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a2, 4096
+; RV64-NEXT:    addi a2, a2, -1
+; RV64-NEXT:    addw a2, a0, a2
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: saddo1.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a3, a0, a1
+; RV64-NEXT:    slt a0, a3, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    sd a3, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: saddo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: saddo3.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, -4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: uaddo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32.constant(i32 signext %v1, ptr %res) {
+; RV64-LABEL: uaddo.i32.constant:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, -2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 -2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32.constant_one(i32 signext %v1, ptr %res) {
+; RV64-LABEL: uaddo.i32.constant_one:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: uaddo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_one(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_one:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 1)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: ssubo1.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a3, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: ssubo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: ssubo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a3, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: usubo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32.constant.rhs(i32 signext %v1, ptr %res) {
+; RV64-LABEL: usubo.i32.constant.rhs:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 2
+; RV64-NEXT:    sltu a0, a0, a2
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 -2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
+; RV64-LABEL: usubo.i32.constant.lhs:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, -2
+; RV64-NEXT:    subw a2, a2, a0
+; RV64-NEXT:    addi a0, a2, 1
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 -2, i32 %v1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: usubo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: smulo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 32
+; RV64-NEXT:    sraiw a3, a1, 31
+; RV64-NEXT:    xor a0, a0, a3
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: smulo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    mul a2, a0, a2
+; RV64-NEXT:    srai a0, a2, 32
+; RV64-NEXT:    sraiw a3, a2, 31
+; RV64-NEXT:    xor a0, a0, a3
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 13)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: smulo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a3, a0, a1
+; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 63
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: smulo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    mulh a3, a0, a2
+; RV64-NEXT:    mul a2, a0, a2
+; RV64-NEXT:    srai a0, a2, 63
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: umulo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 32
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: umulo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    slli a2, a2, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a2, a0, a2
+; RV64-NEXT:    srli a0, a2, 32
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 13)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Similar to umulo.i32, but storing the overflow and returning the result.
+define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) {
+; RV64-LABEL: umulo3.i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    snez a1, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+  %4 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1)
+  %5 = extractvalue { i32, i1 } %4, 1
+  %6 = extractvalue { i32, i1 } %4, 0
+  %7 = zext i1 %5 to i32
+  store i32 %7, ptr %2, align 4
+  ret i32 %6
+}
+
+define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: umulo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a3, a0, a1
+; RV64-NEXT:    snez a3, a3
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: umulo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a3, 13
+; RV64-NEXT:    mulhu a2, a0, a3
+; RV64-NEXT:    snez a2, a2
+; RV64-NEXT:    mul a0, a0, a3
+; RV64-NEXT:    sd a0, 0(a1)
+; RV64-NEXT:    mv a0, a2
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+
+;
+; Check the use of the overflow bit in combination with a select instruction.
+;
+define i32 @saddo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a2, a2, a0
+; RV64-NEXT:    slti a3, a1, 0
+; RV64-NEXT:    bne a3, a2, .LBB28_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB28_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @saddo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a2, a2, a0
+; RV64-NEXT:    slti a3, a1, 0
+; RV64-NEXT:    bne a3, a2, .LBB30_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB30_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @saddo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @uaddo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: uaddo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    bltu a2, a0, .LBB32_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB32_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @uaddo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: uaddo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    bltu a2, a0, .LBB34_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB34_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @uaddo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @ssubo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a3, a0, a1
+; RV64-NEXT:    slt a3, a3, a0
+; RV64-NEXT:    bne a2, a3, .LBB36_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB36_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @ssubo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssubo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    slt a3, a3, a0
+; RV64-NEXT:    bne a2, a3, .LBB38_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB38_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @ssub.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssub.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @usubo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    bltu a0, a2, .LBB40_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB40_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @usubo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    bltu a0, a2, .LBB42_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB42_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @usubo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @smulo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a2, a0, a1
+; RV64-NEXT:    srai a3, a2, 32
+; RV64-NEXT:    sraiw a2, a2, 31
+; RV64-NEXT:    bne a3, a2, .LBB44_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB44_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @smulo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    sraiw a0, a0, 31
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a3, a0, a1
+; RV64-NEXT:    srai a3, a3, 63
+; RV64-NEXT:    bne a2, a3, .LBB46_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB46_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @umulo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    slli a3, a0, 32
+; RV64-NEXT:    mulhu a2, a3, a2
+; RV64-NEXT:    srai a2, a2, 32
+; RV64-NEXT:    bnez a2, .LBB48_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB48_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @umulo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 32
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a2, a0, a1
+; RV64-NEXT:    bnez a2, .LBB50_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB50_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+
+;
+; Check the use of the overflow bit in combination with a branch instruction.
+;
+define zeroext i1 @saddo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    beq a1, a0, .LBB52_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB52_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    beq a1, a0, .LBB53_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB53_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
+; RV64-LABEL: uaddo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB54_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB54_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    bgeu a1, a0, .LBB55_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB55_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    beq a2, a0, .LBB56_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB56_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssubo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    beq a2, a0, .LBB57_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB57_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    bgeu a0, a1, .LBB58_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB58_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    bgeu a0, a1, .LBB59_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB59_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    sraiw a0, a0, 31
+; RV64-NEXT:    beq a1, a0, .LBB60_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB60_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    beq a2, a0, .LBB61_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB61_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo2.br.i64(i64 %v1) {
+; RV64-LABEL: smulo2.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, -13
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    beq a2, a0, .LBB62_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB62_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 -13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 32
+; RV64-NEXT:    beqz a0, .LBB63_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB63_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    beqz a0, .LBB64_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB64_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo2.br.i64(i64 %v1) {
+; RV64-LABEL: umulo2.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB65_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB65_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.i64.constant(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2048(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_2048:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2047
+; RV64-NEXT:    addi a2, a2, 1
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2048)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2049(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_2049:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2047
+; RV64-NEXT:    addi a2, a2, 2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2049)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define i64 @uaddo.i64.constant_setcc_on_overflow_flag(ptr %p) {
+; RV64-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    addi a0, a1, 2
+; RV64-NEXT:    bltu a0, a1, .LBB69_2
+; RV64-NEXT:  # %bb.1: # %IfOverflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV64-NEXT:    ret
+entry:
+  %v1 = load i64, ptr %p
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %IfNoOverflow, label %IfOverflow
+IfOverflow:
+  ret i64 0
+IfNoOverflow:
+  ret i64 %val
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+


        


More information about the llvm-commits mailing list