[llvm] [RISCV] Add experimental support for making i32 a legal type on RV64 in SelectionDAG. (PR #70357)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 15:26:15 PDT 2023


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/70357

>From 2ac63854797f0f449ba078e287cda00d35eb5be3 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 17 Sep 2023 09:44:10 -0700
Subject: [PATCH] [RISCV] Add experimental support for making i32 a legal type
 on RV64 in SelectionDAG.

This will select i32 operations directly to W instructions
without custom nodes. Hopefully this can allow us to be less
dependent on hasAllNBitUsers to recover i32 operations
in RISCVISelDAGToDAG.cpp.

This support is enabled with a command line option that is off by
default.

Generated code is still very not optimal.

I've duplicated many test cases for this, but its not complete.
I think that enabling this runs all existing lit tests without
crashing.
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   19 +-
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |    2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |    2 +-
 llvm/lib/Target/RISCV/RISCVGISel.td           |   47 +-
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |   12 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  332 ++-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |    6 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |   59 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoA.td      |   58 +
 llvm/lib/Target/RISCV/RISCVInstrInfoD.td      |    4 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoF.td      |   16 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoM.td      |   15 +
 llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td |   14 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td     |   26 +
 .../lib/Target/RISCV/RISCVInstrInfoZfbfmin.td |    2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td    |   16 +-
 .../CodeGen/RISCV/rv64-legal-i32/alu32.ll     |  240 ++
 llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll |  699 +++++
 llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll | 2564 +++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll |   92 +
 .../CodeGen/RISCV/rv64-legal-i32/mem64.ll     |  341 +++
 llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll |  390 +++
 .../RISCV/rv64-legal-i32/rv64xtheadbb.ll      |  902 ++++++
 .../CodeGen/RISCV/rv64-legal-i32/rv64zba.ll   | 1798 ++++++++++++
 .../RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll |   77 +
 .../RISCV/rv64-legal-i32/rv64zbb-zbkb.ll      |  600 ++++
 .../CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll   | 1068 +++++++
 .../RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll |   42 +
 .../rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll  |   67 +
 .../rv64-legal-i32/rv64zbkb-intrinsic.ll      |   73 +
 .../CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll   | 1000 +++++++
 .../CodeGen/RISCV/rv64-legal-i32/xaluo.ll     | 1308 +++++++++
 32 files changed, 11728 insertions(+), 163 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f19beea3a3ed8b7..82751a442dbc3bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5023,6 +5023,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::SREM:
   case ISD::UDIV:
   case ISD::UREM:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -5039,12 +5043,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
         break;
       case ISD::SDIV:
       case ISD::SREM:
+      case ISD::SMIN:
+      case ISD::SMAX:
         ExtOp = ISD::SIGN_EXTEND;
         break;
       case ISD::UDIV:
       case ISD::UREM:
         ExtOp = ISD::ZERO_EXTEND;
         break;
+      case ISD::UMIN:
+      case ISD::UMAX:
+        if (TLI.isSExtCheaperThanZExt(OVT, NVT))
+          ExtOp = ISD::SIGN_EXTEND;
+        else
+          ExtOp = ISD::ZERO_EXTEND;
+        break;
       }
       TruncOp = ISD::TRUNCATE;
     }
@@ -5166,7 +5179,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     unsigned ExtOp = ISD::FP_EXTEND;
     if (NVT.isInteger()) {
       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
-      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      if (isSignedIntSetCC(CCCode) ||
+          TLI.isSExtCheaperThanZExt(Node->getOperand(0).getValueType(), NVT))
+        ExtOp = ISD::SIGN_EXTEND;
+      else
+        ExtOp = ISD::ZERO_EXTEND;
     }
     if (Node->isStrictFPOpcode()) {
       SDValue InChain = Node->getOperand(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5bd04e2360679d4..2d2585b3db73226 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -371,7 +371,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
         N->getMemOperand());
     ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
     ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
-    return Res.getValue(1);
+    return DAG.getSExtOrTrunc(Res.getValue(1), SDLoc(N), NVT);
   }
 
   // Op2 is used for the comparison and thus must be extended according to the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 229f220d8460bda..29505f7505ba25c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3468,7 +3468,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     }
 
     if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
-        (TLI.isOperationLegalOrCustom(Opc, VT) ||
+        (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
          (UseScalarMinMax &&
           TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
         // If the underlying comparison instruction is used by any other
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index fcac2f365596260..458bf9a2efde4d6 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -21,8 +21,6 @@ def simm12Plus1 : ImmLeaf<XLenVT, [{
 def simm12Plus1i32 : ImmLeaf<i32, [{
     return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
 
-def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
-
 def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
 
 // FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate
@@ -49,11 +47,6 @@ def GIAddrRegImm :
   GIComplexOperandMatcher<s32, "selectAddrRegImm">,
   GIComplexPatternEquiv<AddrRegImm>;
 
-// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
-// checking happy so we can use ADDIW which expects an XLen immediate.
-def as_i64imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
-}]>;
 def gi_as_i64imm : GICustomOperandRenderer<"renderImm">,
   GISDNodeXFormEquiv<as_i64imm>;
 
@@ -88,14 +81,10 @@ def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
           (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
 
 let Predicates = [IsRV64] in {
-def : Pat<(i32 (add GPR:$rs1, GPR:$rs2)), (ADDW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (sub GPR:$rs1, GPR:$rs2)), (SUBW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>;
 
-def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
-          (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
 def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
           (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
 
@@ -116,19 +105,6 @@ def : Pat<(i32 (sra GPR:$rs1, uimm5i32:$imm)),
           (SRAIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
 def : Pat<(i32 (srl GPR:$rs1, uimm5i32:$imm)),
           (SRLIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
-
-def : Pat<(i64 (sext i32:$rs)), (ADDIW GPR:$rs, 0)>;
-}
-
-let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
-def : Pat<(i32 (mul GPR:$rs1, GPR:$rs2)), (MULW GPR:$rs1, GPR:$rs2)>;
-}
-
-let Predicates = [HasStdExtM, IsRV64] in {
-def : Pat<(i32 (sdiv GPR:$rs1, GPR:$rs2)), (DIVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (srem GPR:$rs1, GPR:$rs2)), (REMW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (udiv GPR:$rs1, GPR:$rs2)), (DIVUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i32 (urem GPR:$rs1, GPR:$rs2)), (REMUW GPR:$rs1, GPR:$rs2)>;
 }
 
 let Predicates = [HasStdExtZba, IsRV64] in {
@@ -136,13 +112,8 @@ let Predicates = [HasStdExtZba, IsRV64] in {
 // in SDISel for RV64, which is not the case in GISel.
 def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt),
           (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
-
-def : Pat<(i64 (zext i32:$rs)), (ADD_UW GPR:$rs, (XLenVT X0))>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [IsRV64, NotHasStdExtZba] in
-def: Pat<(i64 (zext i32:$rs)), (SRLI (SLLI GPR:$rs, 32), 32)>;
-
 // Ptr type used in patterns with GlobalISelEmitter
 def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
 
@@ -196,8 +167,6 @@ def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty GPR:$rs2))),
           (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
 }
 
-// Define pattern expansions for load/extload and store/truncstore operations
-// for ptr return type
 let Predicates = [IsRV32] in {
 def : LdPat<load, LW, PtrVT>;
 def : StPat<store, SW, GPR, PtrVT>;
@@ -206,18 +175,4 @@ def : StPat<store, SW, GPR, PtrVT>;
 let Predicates = [IsRV64] in {
 def : LdPat<load, LD, PtrVT>;
 def : StPat<store, SD, GPR, PtrVT>;
-
-// Define pattern expansions for rv64 load/extloads and store/truncstore
-// operations for i32 return type
-def : LdPat<sextloadi8, LB, i32>;
-def : LdPat<extloadi8, LBU, i32>;
-def : LdPat<zextloadi8, LBU, i32>;
-def : LdPat<sextloadi16, LH, i32>;
-def : LdPat<extloadi16, LH, i32>;
-def : LdPat<zextloadi16, LHU, i32>;
-def : LdPat<load, LW, i32>;
-
-def : StPat<truncstorei8, SB, GPR, i32>;
-def : StPat<truncstorei16, SH, GPR, i32>;
-def : StPat<store, SW, GPR, i32>;
-} // Predicates = [IsRV64]
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c2cac993fe13c4b..f3fba4136e96502 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -67,8 +67,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
       SDLoc DL(N);
       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
-      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
-                               N->getOperand(0), VL);
+      SDValue Src = N->getOperand(0);
+      if (VT.isInteger())
+        Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
+                              N->getOperand(0));
+      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
       break;
     }
     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
@@ -833,7 +836,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
   switch (Opcode) {
   case ISD::Constant: {
-    assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
+    assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
     auto *ConstNode = cast<ConstantSDNode>(Node);
     if (ConstNode->isZero()) {
       SDValue New =
@@ -3287,6 +3290,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
   case RISCV::TH_MULAH:
   case RISCV::TH_MULSW:
   case RISCV::TH_MULSH:
+    if (N0.getValueType() == MVT::i32)
+      break;
+
     // Result is already sign extended just remove the sext.w.
     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
     ReplaceUses(N, N0.getNode());
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index beb371063f89b2d..364bfb6fc77947a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -75,6 +75,10 @@ static cl::opt<int>
                        "use for creating a floating-point immediate value"),
               cl::init(2));
 
+static cl::opt<bool>
+    RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
+                 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
+
 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -115,6 +119,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   // Set up the register classes.
   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
+  if (Subtarget.is64Bit() && RV64LegalI32)
+    addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
 
   if (Subtarget.hasStdExtZfhOrZfhmin())
     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
@@ -237,8 +243,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   setOperationAction(ISD::BR_CC, XLenVT, Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::BR_CC, MVT::i32, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
 
   setCondCodeAction(ISD::SETLE, XLenVT, Expand);
   setCondCodeAction(ISD::SETGT, XLenVT, Custom);
@@ -247,6 +257,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
   setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
 
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SETCC, MVT::i32, Promote);
+
   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
 
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -262,14 +275,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.is64Bit()) {
     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
-    setOperationAction(ISD::LOAD, MVT::i32, Custom);
+    if (!RV64LegalI32)
+      setOperationAction(ISD::LOAD, MVT::i32, Custom);
 
-    setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
-                       MVT::i32, Custom);
+    if (RV64LegalI32)
+      setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::i32, Promote);
+    else
+      setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+                         MVT::i32, Custom);
 
-    setOperationAction(ISD::SADDO, MVT::i32, Custom);
-    setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
-                       MVT::i32, Custom);
+    if (!RV64LegalI32) {
+      setOperationAction(ISD::SADDO, MVT::i32, Custom);
+      setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+                         MVT::i32, Custom);
+    }
   } else {
     setLibcallName(
         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -277,19 +296,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setLibcallName(RTLIB::MULO_I64, nullptr);
   }
 
-  if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
+  if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
-  else if (Subtarget.is64Bit())
-    setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
-  else
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::MUL, MVT::i32, Promote);
+  } else if (Subtarget.is64Bit()) {
+    setOperationAction(ISD::MUL, MVT::i128, Custom);
+    if (!RV64LegalI32)
+      setOperationAction(ISD::MUL, MVT::i32, Custom);
+  } else {
     setOperationAction(ISD::MUL, MVT::i64, Custom);
+  }
 
-  if (!Subtarget.hasStdExtM())
+  if (!Subtarget.hasStdExtM()) {
     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
                        XLenVT, Expand);
-  else if (Subtarget.is64Bit())
-    setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
-                       {MVT::i8, MVT::i16, MVT::i32}, Custom);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+                         Promote);
+  } else if (Subtarget.is64Bit()) {
+    if (!RV64LegalI32)
+      setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+                         {MVT::i8, MVT::i16, MVT::i32}, Custom);
+  }
+
+  if (RV64LegalI32 && Subtarget.is64Bit()) {
+    setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
+    setOperationAction(
+        {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
+        Expand);
+  }
 
   setOperationAction(
       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
@@ -299,7 +335,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                      Custom);
 
   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
-    if (Subtarget.is64Bit())
+    if (!RV64LegalI32 && Subtarget.is64Bit())
       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
   } else if (Subtarget.hasVendorXTHeadBb()) {
     if (Subtarget.is64Bit())
@@ -307,6 +343,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
   } else {
     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
   }
 
   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
@@ -316,6 +354,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                       Subtarget.hasVendorXTHeadBb())
                          ? Legal
                          : Expand);
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::BSWAP, MVT::i32,
+                       (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+                        Subtarget.hasVendorXTHeadBb())
+                           ? Promote
+                           : Expand);
+
   // Zbkb can use rev8+brev8 to implement bitreverse.
   setOperationAction(ISD::BITREVERSE, XLenVT,
                      Subtarget.hasStdExtZbkb() ? Custom : Expand);
@@ -323,30 +368,49 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtZbb()) {
     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
                        Legal);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
+                         Promote);
 
-    if (Subtarget.is64Bit())
-      setOperationAction(
-          {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF},
-          MVT::i32, Custom);
+    if (Subtarget.is64Bit()) {
+      if (RV64LegalI32)
+        setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+      else
+        setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
+    }
   } else {
     setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
   }
 
   if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
     // We need the custom lowering to make sure that the resulting sequence
     // for the 32bit case is efficient on 64bit targets.
-    if (Subtarget.is64Bit())
-      setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+    if (Subtarget.is64Bit()) {
+      if (RV64LegalI32) {
+        setOperationAction(ISD::CTLZ, MVT::i32,
+                           Subtarget.hasStdExtZbb() ? Legal : Promote);
+        if (!Subtarget.hasStdExtZbb())
+          setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
+      } else
+        setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+    }
   } else {
     setOperationAction(ISD::CTLZ, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::CTLZ, MVT::i32, Expand);
   }
 
-  if (Subtarget.is64Bit())
+  if (!RV64LegalI32 && Subtarget.is64Bit())
     setOperationAction(ISD::ABS, MVT::i32, Custom);
 
   if (!Subtarget.hasVendorXTHeadCondMov())
     setOperationAction(ISD::SELECT, XLenVT, Custom);
 
+  if (RV64LegalI32 && Subtarget.is64Bit())
+    setOperationAction(ISD::SELECT, MVT::i32, Promote);
+
   static const unsigned FPLegalNodeTypes[] = {
       ISD::FMINNUM,        ISD::FMAXNUM,       ISD::LRINT,
       ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
@@ -525,6 +589,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                         ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
                        XLenVT, Legal);
 
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+                          ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+                         MVT::i32, Legal);
+
     setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
   }
@@ -569,6 +638,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setBooleanVectorContents(ZeroOrOneBooleanContent);
 
     setOperationAction(ISD::VSCALE, XLenVT, Custom);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::VSCALE, MVT::i32, Custom);
 
     // RVV intrinsics may have illegal operands.
     // We also need to custom legalize vmv.x.s.
@@ -1247,8 +1318,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget.hasStdExtA())
+  if (Subtarget.hasStdExtA()) {
     setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
+    if (RV64LegalI32 && Subtarget.is64Bit())
+      setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+  }
 
   if (Subtarget.hasForcedAtomics()) {
     // Force __sync libcalls to be emitted for atomic rmw/cas operations.
@@ -2079,7 +2153,12 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
     return MVT::f32;
 
-  return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+  MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
+    return MVT::i64;
+
+  return PartVT;
 }
 
 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
@@ -2094,6 +2173,21 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
 }
 
+unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+  unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+      Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
+    IntermediateVT = MVT::i64;
+
+  if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
+    RegisterVT = MVT::i64;
+
+  return NumRegs;
+}
+
 // Changes the condition code and swaps operands if necessary, so the SetCC
 // operation matches one of the comparisons supported directly by branches
 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
@@ -3252,6 +3346,8 @@ static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
       auto OpCode =
         VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+      if (!VT.isFloatingPoint())
+        LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
       Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
                         LastOp, Mask, VL);
       Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
@@ -3379,6 +3475,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
                                         : RISCVISD::VMV_V_X_VL;
+    if (!VT.isFloatingPoint())
+      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
     Splat =
         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3612,10 +3710,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
       // For a splat, perform a scalar truncate before creating the wider
       // vector.
-      assert(Splat.getValueType() == XLenVT &&
-             "Unexpected type for i1 splat value");
-      Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
-                          DAG.getConstant(1, DL, XLenVT));
+      Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
+                          DAG.getConstant(1, DL, Splat.getValueType()));
       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
     } else {
       SmallVector<SDValue, 8> Ops(Op->op_values());
@@ -3632,6 +3728,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       return Gather;
     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
                                         : RISCVISD::VMV_V_X_VL;
+    if (!VT.isFloatingPoint())
+      Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
     Splat =
         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3693,7 +3791,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
   SDValue Vec = DAG.getUNDEF(ContainerVT);
   UndefCount = 0;
-  for (const SDValue &V : Op->ops()) {
+  for (SDValue V : Op->ops()) {
     if (V.isUndef()) {
       UndefCount++;
       continue;
@@ -3706,6 +3804,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     }
     auto OpCode =
       VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+    if (!VT.isFloatingPoint())
+      V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
                       V, Mask, VL);
   }
@@ -4266,6 +4366,8 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
   auto OpCode = IsVSlidedown ?
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+  if (!VT.isFloatingPoint())
+    Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
   auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
                          DAG.getUNDEF(ContainerVT),
                          convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
@@ -5151,10 +5253,12 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
     return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
   }
 
-  SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
-  SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
-  return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
-                      ISD::CondCode::SETNE);
+  SDValue FPCLASS =
+      DAG.getNode(RISCVISD::FPCLASS, DL, XLenVT, Op.getOperand(0));
+  SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FPCLASS, TDCMaskV);
+  SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
+                             ISD::CondCode::SETNE);
+  return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
 }
 
 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
@@ -5662,6 +5766,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     if (VT.isFixedLengthVector())
       ContainerVT = getContainerForFixedLengthVector(VT);
     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+    Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
     SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
                             DAG.getUNDEF(ContainerVT), Scalar, VL);
     if (VT.isFixedLengthVector())
@@ -5669,9 +5774,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return V;
   }
   case ISD::VSCALE: {
+    MVT XLenVT = Subtarget.getXLenVT();
     MVT VT = Op.getSimpleValueType();
     SDLoc DL(Op);
-    SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+    SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
     // We define our scalable vector types for lmul=1 to use a 64 bit known
     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
     // vscale as VLENB / 8.
@@ -5684,22 +5790,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     if (isPowerOf2_64(Val)) {
       uint64_t Log2 = Log2_64(Val);
       if (Log2 < 3)
-        return DAG.getNode(ISD::SRL, DL, VT, VLENB,
-                           DAG.getConstant(3 - Log2, DL, VT));
-      if (Log2 > 3)
-        return DAG.getNode(ISD::SHL, DL, VT, VLENB,
-                           DAG.getConstant(Log2 - 3, DL, VT));
-      return VLENB;
-    }
-    // If the multiplier is a multiple of 8, scale it down to avoid needing
-    // to shift the VLENB value.
-    if ((Val % 8) == 0)
-      return DAG.getNode(ISD::MUL, DL, VT, VLENB,
-                         DAG.getConstant(Val / 8, DL, VT));
-
-    SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
-                                 DAG.getConstant(3, DL, VT));
-    return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+        Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+                          DAG.getConstant(3 - Log2, DL, VT));
+      else if (Log2 > 3)
+        Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
+                          DAG.getConstant(Log2 - 3, DL, XLenVT));
+    } else if ((Val % 8) == 0) {
+      // If the multiplier is a multiple of 8, scale it down to avoid needing
+      // to shift the VLENB value.
+      Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
+                        DAG.getConstant(Val / 8, DL, XLenVT));
+    } else {
+      SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+                                   DAG.getConstant(3, DL, XLenVT));
+      Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
+                        DAG.getConstant(Val, DL, XLenVT));
+    }
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
   }
   case ISD::FPOWI: {
     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
@@ -5947,7 +6054,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
     SDValue Res =
         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
-    if (Subtarget.is64Bit())
+    if (Subtarget.is64Bit() && !RV64LegalI32)
       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
     return DAG.getBitcast(MVT::i32, Res);
   }
@@ -5976,7 +6083,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
     SDValue Res =
         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
-    if (Subtarget.is64Bit())
+    if (Subtarget.is64Bit() && !RV64LegalI32)
       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
     return DAG.getBitcast(MVT::i32, Res);
   }
@@ -7144,12 +7251,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
   }
-  MVT XLenVT = Subtarget.getXLenVT();
-  assert(SplatVal.getValueType() == XLenVT &&
-         "Unexpected type for i1 splat value");
   MVT InterVT = VT.changeVectorElementType(MVT::i8);
-  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
-                         DAG.getConstant(1, DL, XLenVT));
+  SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
+                         DAG.getConstant(1, DL, SplatVal.getValueType()));
   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
   SDValue Zero = DAG.getConstant(0, DL, InterVT);
   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
@@ -7578,6 +7682,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
     unsigned Opc =
         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
     if (isNullConstant(Idx)) {
+      if (!VecVT.isFloatingPoint())
+        Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
 
       if (ContainerVT != OrigContainerVT)
@@ -7682,8 +7788,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
       auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
       SDValue Vfirst =
           DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
-      return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
-                          ISD::SETEQ);
+      SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
+                                 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+      return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
     }
     if (VecVT.isFixedLengthVector()) {
       unsigned NumElts = VecVT.getVectorNumElements();
@@ -7721,8 +7828,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
         // Extract the bit from GPR.
         SDValue ShiftRight =
             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
-        return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
-                           DAG.getConstant(1, DL, XLenVT));
+        SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+                                  DAG.getConstant(1, DL, XLenVT));
+        return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
       }
     }
     // Otherwise, promote to an i8 vector and extract from that.
@@ -8017,7 +8125,9 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
   SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
 
   SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+  SDValue Res =
+      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
 }
 
 // LMUL * VLEN should be greater than or equal to EGS * SEW
@@ -8061,12 +8171,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
     }
 
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
   }
   case Intrinsic::riscv_sm4ks:
   case Intrinsic::riscv_sm4ed: {
     unsigned Opc =
         IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      SDValue Res =
+          DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
                        Op.getOperand(3));
   }
@@ -8077,20 +8205,43 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
   }
   case Intrinsic::riscv_clmul:
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
     return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::riscv_clmulh:
-    return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
-                       Op.getOperand(2));
-  case Intrinsic::riscv_clmulr:
-    return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
-                       Op.getOperand(2));
+  case Intrinsic::riscv_clmulr: {
+    unsigned Opc =
+        IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
+    if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+      SDValue NewOp0 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+      SDValue NewOp1 =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+      NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+                           DAG.getConstant(32, DL, MVT::i64));
+      NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+                           DAG.getConstant(32, DL, MVT::i64));
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
+      Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
+                        DAG.getConstant(32, DL, MVT::i64));
+      return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+    }
+
+    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
+  }
   case Intrinsic::experimental_get_vector_length:
     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
-  case Intrinsic::riscv_vmv_x_s:
-    assert(Op.getValueType() == XLenVT && "Unexpected VT!");
-    return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
-                       Op.getOperand(1));
+  case Intrinsic::riscv_vmv_x_s: {
+    SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
+    return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
+  }
   case Intrinsic::riscv_vfmv_f_s:
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
                        Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
@@ -8610,8 +8761,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
          "Unexpected reduction lowering");
 
   MVT XLenVT = Subtarget.getXLenVT();
-  assert(Op.getValueType() == XLenVT &&
-         "Expected reduction output to be legalized to XLenVT");
 
   MVT ContainerVT = VecVT;
   if (VecVT.isFixedLengthVector()) {
@@ -8665,6 +8814,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
   }
 
   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+  SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
 
   if (!IsVP)
     return SetCC;
@@ -8675,7 +8825,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
   // 0 for an inactive vector, and so we've already received the neutral value:
   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
   // can simply include the start value.
-  return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
+  return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
 }
 
 static bool isNonZeroAVL(SDValue AVL) {
@@ -10570,6 +10720,8 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
 
+  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
+
   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
                               DAG.getConstant(2, DL, XLenVT));
   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
@@ -16721,12 +16873,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
     break;
   case CCValAssign::BCvt:
     if (VA.getLocVT().isInteger() &&
-        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
-    else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
-      Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
-    else
+    } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      if (RV64LegalI32) {
+        Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
+        Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+      } else {
+        Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+      }
+    } else {
       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+    }
     break;
   }
   return Val;
@@ -16780,13 +16938,19 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
     break;
   case CCValAssign::BCvt:
-    if (VA.getLocVT().isInteger() &&
-        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
-      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
-    else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
-      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
-    else
+    if (LocVT.isInteger() &&
+        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
+      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
+    } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
+      if (RV64LegalI32) {
+        Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
+        Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
+      } else {
+        Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+      }
+    } else {
       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+    }
     break;
   }
   return Val;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5ca6376f858c44d..d4e18f0f94a8366 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -487,6 +487,12 @@ class RISCVTargetLowering : public TargetLowering {
                                          CallingConv::ID CC,
                                          EVT VT) const override;
 
+  unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+                                                CallingConv::ID CC, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                MVT &RegisterVT) const override;
+
   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
                                             EVT VT) const override;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 1a9242cff0b445d..71ba4025b6a07e9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1183,11 +1183,13 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
 
 class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
     : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
-    : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
+                ValueType vt2 = XLenVT>
+    : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
 
-class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType>
-    : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)),
+class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
+                ValueType vt = XLenVT>
+    : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)),
           (Inst GPR:$rs1, ImmType:$imm)>;
 class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
     : PatGprImm<OpNode, Inst, simm12>;
@@ -1744,7 +1746,7 @@ def : LdPat<sextloadi8, LB>;
 def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
 def : LdPat<sextloadi16, LH>;
 def : LdPat<extloadi16, LH>;
-def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
+def : LdPat<load, LW, i32>;
 def : LdPat<zextloadi8, LBU>;
 def : LdPat<zextloadi16, LHU>;
 
@@ -1758,7 +1760,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
 
 def : StPat<truncstorei8, SB, GPR, XLenVT>;
 def : StPat<truncstorei16, SH, GPR, XLenVT>;
-def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
+def : StPat<store, SW, GPR, i32>;
 
 /// Fences
 
@@ -1992,6 +1994,51 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
                  (AddiPairImmSmall AddiPair:$rs2))>;
 }
 
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
+
+// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
+// checking happy so we can use ADDIW which expects an XLen immediate.
+def as_i64imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+let Predicates = [IsRV64] in {
+def : LdPat<sextloadi8, LB, i32>;
+def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
+def : LdPat<sextloadi16, LH, i32>;
+def : LdPat<extloadi16, LH, i32>;
+def : LdPat<zextloadi8, LBU, i32>;
+def : LdPat<zextloadi16, LHU, i32>;
+
+def : StPat<truncstorei8, SB, GPR, i32>;
+def : StPat<truncstorei16, SH, GPR, i32>;
+
+def : Pat<(anyext GPR:$src), (COPY GPR:$src)>;
+def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>;
+def : Pat<(trunc GPR:$src), (COPY GPR:$src)>;
+
+def : PatGprGpr<add, ADDW, i32, i32>;
+def : PatGprGpr<sub, SUBW, i32, i32>;
+def : PatGprGpr<shiftopw<shl>, SLLW, i32, i64>;
+def : PatGprGpr<shiftopw<srl>, SRLW, i32, i64>;
+def : PatGprGpr<shiftopw<sra>, SRAW, i32, i64>;
+
+def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
+          (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
+
+def : PatGprImm<shl, SLLIW, uimm5, i32>;
+def : PatGprImm<srl, SRLIW, uimm5, i32>;
+def : PatGprImm<sra, SRAIW, uimm5, i32>;
+}
+
+let Predicates = [IsRV64, NotHasStdExtZba] in {
+def : Pat<(zext GPR:$src), (SRLI (SLLI GPR:$src, 32), 32)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Standard extensions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index c43af14bb7f7005..5a3d393bdb599e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -372,3 +372,61 @@ def : Pat<(int_riscv_masked_cmpxchg_i64
           (PseudoMaskedCmpXchg32
             GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
 } // Predicates = [HasStdExtA, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+class PatGprGprA<SDPatternOperator OpNode, RVInst Inst, ValueType vt>
+    : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+
+multiclass AMOPat2<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+                   list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+                   !cast<RVInst>(BaseInst#"_AQ"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+                   !cast<RVInst>(BaseInst#"_RL"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                   !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                   !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                   !cast<RVInst>(BaseInst), vt>;
+  def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                   !cast<RVInst>(BaseInst), vt>;
+}
+}
+
+defm : AMOPat2<"atomic_swap_32", "AMOSWAP_W", i32>;
+defm : AMOPat2<"atomic_load_add_32", "AMOADD_W", i32>;
+defm : AMOPat2<"atomic_load_and_32", "AMOAND_W", i32>;
+defm : AMOPat2<"atomic_load_or_32", "AMOOR_W", i32>;
+defm : AMOPat2<"atomic_load_xor_32", "AMOXOR_W", i32>;
+defm : AMOPat2<"atomic_load_max_32", "AMOMAX_W", i32>;
+defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
+defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
+defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
+
+let Predicates = [HasAtomicLdSt] in {
+  def : LdPat<atomic_load_8,  LB, i32>;
+  def : LdPat<atomic_load_16, LH, i32>;
+  def : LdPat<atomic_load_32, LW, i32>;
+
+  def : StPat<atomic_store_8,  SB, GPR, i32>;
+  def : StPat<atomic_store_16, SH, GPR, i32>;
+  def : StPat<atomic_store_32, SW, GPR, i32>;
+}
+
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 34becfafe77473d..f3794c8a0433b1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -538,7 +538,7 @@ def SplitF64Pseudo_INX
              [(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>;
 } // Predicates = [HasStdExtZdinx, IsRV32]
 
-let Predicates = [HasStdExtD, IsRV32] in {
+let Predicates = [HasStdExtD] in {
 
 // double->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>;
@@ -557,7 +557,7 @@ def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>;
 // [u]int->double.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1, FRM_RNE)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1, FRM_RNE)>;
-} // Predicates = [HasStdExtD, IsRV32]
+} // Predicates = [HasStdExtD]
 
 let Predicates = [HasStdExtZdinx, IsRV32] in {
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 3a5794bb2d19474..32a66882fcd54d4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -680,19 +680,19 @@ def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm
           (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
 } // Predicates = [HasStdExtZfinx]
 
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
 // Moves (no conversion)
 def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
 def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
 
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
 // Moves (no conversion)
 def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
 def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
 
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
 // float->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>;
@@ -710,9 +710,9 @@ def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>;
 // [u]int->float. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
 
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
 // float->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>;
@@ -730,7 +730,7 @@ def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>;
 // [u]int->float. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
 
 let Predicates = [HasStdExtF, IsRV64] in {
 // Moves (no conversion)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 6c3c9a771d94b62..f9890ca4b0eec15 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -114,3 +114,18 @@ let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in {
 def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
           (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
 } // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
+def : PatGprGpr<mul, MULW, i32, i32>;
+}
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : PatGprGpr<sdiv, DIVW, i32, i32>;
+def : PatGprGpr<udiv, DIVUW, i32, i32>;
+def : PatGprGpr<srem, REMW, i32, i32>;
+def : PatGprGpr<urem, REMUW, i32, i32>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 41e139e3c7a9ebe..1d44b1ad26364e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -886,9 +886,7 @@ defm : StoreUpdatePat<post_truncsti8, TH_SBIA>;
 defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>;
 defm : StoreUpdatePat<post_truncsti16, TH_SHIA>;
 defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>;
-}
 
-let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
 defm : StoreUpdatePat<post_store, TH_SWIA, i32>;
 defm : StoreUpdatePat<pre_store, TH_SWIB, i32>;
 }
@@ -899,3 +897,15 @@ defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>;
 defm : StoreUpdatePat<post_store, TH_SDIA, i64>;
 defm : StoreUpdatePat<pre_store, TH_SDIB, i64>;
 }
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : StoreUpdatePat<post_truncsti8, TH_SBIA, i32>;
+defm : StoreUpdatePat<pre_truncsti8, TH_SBIB, i32>;
+defm : StoreUpdatePat<post_truncsti16, TH_SHIA, i32>;
+defm : StoreUpdatePat<pre_truncsti16, TH_SHIB, i32>;
+}
+
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 4a62a61dadcf3bb..fec6396c602baad 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -812,3 +812,29 @@ let Predicates = [HasStdExtZbkx] in {
 def : PatGprGpr<int_riscv_xperm4, XPERM4>;
 def : PatGprGpr<int_riscv_xperm8, XPERM8>;
 } // Predicates = [HasStdExtZbkx]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<ctlz, CLZW, i32>;
+def : PatGpr<cttz, CTZW, i32>;
+def : PatGpr<ctpop, CPOPW, i32>;
+
+def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
+def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
+def : PatGprGpr<shiftopw<rotl>, ROLW, i32, i64>;
+def : PatGprGpr<shiftopw<rotr>, RORW, i32, i64>;
+def : PatGprImm<rotr, RORIW, uimm5, i32>;
+
+def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)),
+          (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
index f3809f2abff695b..d819033eea68c70 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -62,7 +62,7 @@ def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
 def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
 } // Predicates = [HasStdExtZfbfmin]
 
-let Predicates = [HasStdExtZfbfmin, IsRV32] in {
+let Predicates = [HasStdExtZfbfmin] in {
 // bf16->[u]int. Round-to-zero must be used for the f32->int step, the
 // rounding mode has no effect for bf16->f32.
 def : Pat<(i32 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 1dc391d3f084fec..19d467f3b344c2a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -461,7 +461,7 @@ def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src,
 def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxOrZhinxmin]
 
-let Predicates = [HasStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfh] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>;
 def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>;
@@ -479,9 +479,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>;
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>;
 def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfh]
 
-let Predicates = [HasStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinx] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>;
 def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>;
@@ -499,7 +499,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>;
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinx]
 
 let Predicates = [HasStdExtZfh, IsRV64] in {
 // Use target specific isd nodes to help us remember the result is sign
@@ -597,7 +597,7 @@ def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2),
 def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64]
 
-let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
@@ -611,9 +611,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
 def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfhmin, NoStdExtZfh]
 
-let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in {
 // half->[u]int. Round-to-zero must be used.
 def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
 def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
@@ -627,7 +627,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FR
 // [u]int->half. Match GCC and default to using dynamic rounding mode.
 def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
 def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx]
 
 let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
 // half->[u]int64. Round-to-zero must be used.
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
new file mode 100644
index 000000000000000..e4eca5c491edb18
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/alu32.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+
+; These tests are each targeted at a particular RISC-V ALU instruction. Most
+; other files in this folder exercise LLVM IR instructions that don't directly
+; match a RISC-V instruction.
+
+; Register-immediate instructions.
+
+define i32 @addi(i32 %a) nounwind {
+; RV64I-LABEL: addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addiw a0, a0, 1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 1
+  ret i32 %1
+}
+
+define i32 @slti(i32 %a) nounwind {
+; RV64I-LABEL: slti:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    slti a0, a0, 2
+; RV64I-NEXT:    ret
+  %1 = icmp slt i32 %a, 2
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @sltiu(i32 %a) nounwind {
+; RV64I-LABEL: sltiu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltiu a0, a0, 3
+; RV64I-NEXT:    ret
+  %1 = icmp ult i32 %a, 3
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @xori(i32 %a) nounwind {
+; RV64I-LABEL: xori:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xori a0, a0, 4
+; RV64I-NEXT:    ret
+  %1 = xor i32 %a, 4
+  ret i32 %1
+}
+
+define i32 @ori(i32 %a) nounwind {
+; RV64I-LABEL: ori:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ori a0, a0, 5
+; RV64I-NEXT:    ret
+  %1 = or i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @andi(i32 %a) nounwind {
+; RV64I-LABEL: andi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 6
+; RV64I-NEXT:    ret
+  %1 = and i32 %a, 6
+  ret i32 %1
+}
+
+define i32 @slli(i32 %a) nounwind {
+; RV64I-LABEL: slli:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slliw a0, a0, 7
+; RV64I-NEXT:    ret
+  %1 = shl i32 %a, 7
+  ret i32 %1
+}
+
+define i32 @srli(i32 %a) nounwind {
+; RV64I-LABEL: srli:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 8
+; RV64I-NEXT:    ret
+  %1 = lshr i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @srai(i32 %a) nounwind {
+; RV64I-LABEL: srai:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a0, a0, 9
+; RV64I-NEXT:    ret
+  %1 = ashr i32 %a, 9
+  ret i32 %1
+}
+
+; Register-register instructions
+
+define i32 @add(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sub(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sub:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = sub i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sub_negative_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sub_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -2
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = sub i32 -2, %a
+  ret i32 %1
+}
+
+define i32 @sll(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sll:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sll_negative_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sll_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sllw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = shl i32 -1, %a
+  ret i32 %1
+}
+
+define i32 @slt(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: slt:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    slt a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = icmp slt i32 %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @sltu(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: sltu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = icmp ult i32 %a, %b
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+define i32 @xor(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: xor:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = xor i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srl(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: srl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = lshr i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srl_negative_constant_lhs(i32 %a) nounwind {
+;
+; RV64I-LABEL: srl_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    srlw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = lshr i32 -1, %a
+  ret i32 %1
+}
+
+define i32 @sra(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: sra:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = ashr i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sra_negative_constant_lhs(i32 %a) nounwind {
+;
+; RV64I-LABEL: sra_negative_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    sraw a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = ashr i32 2147483648, %a
+  ret i32 %1
+}
+
+define i32 @or(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: or:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = or i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @and(i32 %a, i32 %b) nounwind {
+;
+; RV64I-LABEL: and:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = and i32 %a, %b
+  ret i32 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
new file mode 100644
index 000000000000000..f2228e9013ce9f1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
@@ -0,0 +1,699 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64IM %s
+
+define i32 @udiv(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: udiv:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @udiv_constant(i32 %a) nounwind {
+; RV64I-LABEL: udiv_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    lui a1, 838861
+; RV64IM-NEXT:    addi a1, a1, -819
+; RV64IM-NEXT:    slli a1, a1, 32
+; RV64IM-NEXT:    mulhu a0, a0, a1
+; RV64IM-NEXT:    srli a0, a0, 34
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @udiv_pow2(i32 %a) nounwind {
+; RV64I-LABEL: udiv_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @udiv_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: udiv_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i32 10, %a
+  ret i32 %1
+}
+
+define i64 @udiv64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: udiv64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divu a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @udiv64_constant(i64 %a) nounwind {
+; RV64I-LABEL: udiv64_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a1, 838861
+; RV64IM-NEXT:    addiw a1, a1, -819
+; RV64IM-NEXT:    slli a2, a1, 32
+; RV64IM-NEXT:    add a1, a1, a2
+; RV64IM-NEXT:    mulhu a0, a0, a1
+; RV64IM-NEXT:    srli a0, a0, 2
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 %a, 5
+  ret i64 %1
+}
+
+define i64 @udiv64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: udiv64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __udivdi3 at plt
+;
+; RV64IM-LABEL: udiv64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divu a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i64 10, %a
+  ret i64 %1
+}
+
+define i8 @udiv8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: udiv8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a1, a1, 255
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @udiv8_constant(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 205
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a0, a0, 10
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, 5
+  ret i8 %1
+}
+
+define i8 @udiv8_pow2(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 248
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 248
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 %a, 8
+  ret i8 %1
+}
+
+define i8 @udiv8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: udiv8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i8 10, %a
+  ret i8 %1
+}
+
+define i16 @udiv16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: udiv16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a2, 16
+; RV64IM-NEXT:    addi a2, a2, -1
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @udiv16_constant(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    lui a1, 13
+; RV64IM-NEXT:    addi a1, a1, -819
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a0, a0, 18
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, 5
+  ret i16 %1
+}
+
+define i16 @udiv16_pow2(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    srliw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    srliw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 %a, 8
+  ret i16 %1
+}
+
+define i16 @udiv16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: udiv16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a1, a0, 48
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __udivdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: udiv16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    divuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = udiv i16 10, %a
+  ret i16 %1
+}
+
+define i32 @sdiv(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sdiv:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @sdiv_constant(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a0, a0
+; RV64IM-NEXT:    lui a1, 419430
+; RV64IM-NEXT:    addiw a1, a1, 1639
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 63
+; RV64IM-NEXT:    srai a0, a0, 33
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 5
+  ret i32 %1
+}
+
+define i32 @sdiv_pow2(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 3
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    sraiw a0, a0, 3
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @sdiv_pow2_2(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_pow2_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_pow2_2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 %a, 65536
+  ret i32 %1
+}
+
+define i32 @sdiv_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: sdiv_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i32 -10, %a
+  ret i32 %1
+}
+
+define i64 @sdiv64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sdiv64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    div a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @sdiv64_constant(i64 %a) nounwind {
+; RV64I-LABEL: sdiv64_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV64IM-NEXT:    ld a1, %lo(.LCPI21_0)(a1)
+; RV64IM-NEXT:    mulh a0, a0, a1
+; RV64IM-NEXT:    srli a1, a0, 63
+; RV64IM-NEXT:    srai a0, a0, 1
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 %a, 5
+  ret i64 %1
+}
+
+define i64 @sdiv64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: sdiv64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    div a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i64 10, %a
+  ret i64 %1
+}
+
+; Although this sdiv has two sexti32 operands, it shouldn't compile to divw on
+; RV64M as that wouldn't produce the correct result for e.g. INT_MIN/-1.
+
+define i64 @sdiv64_sext_operands(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: sdiv64_sext_operands:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    tail __divdi3 at plt
+;
+; RV64IM-LABEL: sdiv64_sext_operands:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a0, a0
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    div a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %3 = sdiv i64 %1, %2
+  ret i64 %3
+}
+
+define i8 @sdiv8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: sdiv8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @sdiv8_constant(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, 103
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    sraiw a1, a0, 9
+; RV64IM-NEXT:    srliw a0, a0, 15
+; RV64IM-NEXT:    andi a0, a0, 1
+; RV64IM-NEXT:    addw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, 5
+  ret i8 %1
+}
+
+define i8 @sdiv8_pow2(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    srliw a1, a1, 12
+; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 27
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    srliw a1, a1, 12
+; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 27
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 %a, 8
+  ret i8 %1
+}
+
+define i8 @sdiv8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: sdiv8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a1, a0, 24
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i8 -10, %a
+  ret i8 %1
+}
+
+define i16 @sdiv16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: sdiv16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    divw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @sdiv16_constant(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_constant:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    lui a1, 6
+; RV64IM-NEXT:    addi a1, a1, 1639
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    srliw a1, a0, 31
+; RV64IM-NEXT:    sraiw a0, a0, 17
+; RV64IM-NEXT:    addw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, 5
+  ret i16 %1
+}
+
+define i16 @sdiv16_pow2(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    srliw a1, a1, 28
+; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 19
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    srliw a1, a1, 28
+; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    add a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 19
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 %a, 8
+  ret i16 %1
+}
+
+define i16 @sdiv16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: sdiv16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a1, a0, 16
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __divdi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: sdiv16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    divw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = sdiv i16 -10, %a
+  ret i16 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
new file mode 100644
index 000000000000000..0ef17ca964db567
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll
@@ -0,0 +1,2564 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=RV64I,RV64-NOPOOL
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=RV64I,RV64I-POOL
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zba \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBA
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbb \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBB
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbs \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \
+; RUN:   -riscv-experimental-rv64-legal-i32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB
+
+; Materializing constants
+
+; TODO: It would be preferable if anyext constant returns were sign rather
+; than zero extended. See PR39092. For now, mark returns as explicitly signext
+; (this matches what Clang would generate for equivalent C/C++ anyway).
+
+define signext i32 @zero() nounwind {
+; RV64I-LABEL: zero:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 0
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: zero:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: zero:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 0
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: zero:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 0
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: zero:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 0
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 0
+}
+
+define signext i32 @pos_small() nounwind {
+; RV64I-LABEL: pos_small:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 2047
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_small:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 2047
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_small:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 2047
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_small:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 2047
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 2047
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2047
+}
+
+define signext i32 @neg_small() nounwind {
+; RV64I-LABEL: neg_small:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -2048
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_small:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -2048
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_small:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -2048
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_small:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -2048
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_small:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2048
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -2048
+}
+
+define signext i32 @pos_i32() nounwind {
+; RV64I-LABEL: pos_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 423811
+; RV64I-NEXT:    addiw a0, a0, -1297
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_i32:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 423811
+; RV64IZBA-NEXT:    addiw a0, a0, -1297
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_i32:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 423811
+; RV64IZBB-NEXT:    addiw a0, a0, -1297
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_i32:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 423811
+; RV64IZBS-NEXT:    addiw a0, a0, -1297
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 423811
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1297
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 1735928559
+}
+
+define signext i32 @neg_i32() nounwind {
+; RV64I-LABEL: neg_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 912092
+; RV64I-NEXT:    addiw a0, a0, -273
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_i32:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 912092
+; RV64IZBA-NEXT:    addiw a0, a0, -273
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_i32:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 912092
+; RV64IZBB-NEXT:    addiw a0, a0, -273
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_i32:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 912092
+; RV64IZBS-NEXT:    addiw a0, a0, -273
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 912092
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -559038737
+}
+
+define signext i32 @pos_i32_hi20_only() nounwind {
+; RV64I-LABEL: pos_i32_hi20_only:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_i32_hi20_only:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 16
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_i32_hi20_only:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_i32_hi20_only:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 65536 ; 0x10000
+}
+
+define signext i32 @neg_i32_hi20_only() nounwind {
+; RV64I-LABEL: neg_i32_hi20_only:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048560
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: neg_i32_hi20_only:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 1048560
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: neg_i32_hi20_only:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048560
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: neg_i32_hi20_only:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1048560
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: neg_i32_hi20_only:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048560
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 -65536 ; -0x10000
+}
+
+; This can be materialized with ADDI+SLLI, improving compressibility.
+
+define signext i32 @imm_left_shifted_addi() nounwind {
+; RV64I-LABEL: imm_left_shifted_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 32
+; RV64I-NEXT:    addiw a0, a0, -64
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 32
+; RV64IZBA-NEXT:    addiw a0, a0, -64
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 32
+; RV64IZBB-NEXT:    addiw a0, a0, -64
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 32
+; RV64IZBS-NEXT:    addiw a0, a0, -64
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 32
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -64
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 131008 ; 0x1FFC0
+}
+
+; This can be materialized with ADDI+SRLI, improving compressibility.
+
+define signext i32 @imm_right_shifted_addi() nounwind {
+; RV64I-LABEL: imm_right_shifted_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 524288
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524288
+; RV64IZBA-NEXT:    addiw a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524288
+; RV64IZBB-NEXT:    addiw a0, a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 524288
+; RV64IZBS-NEXT:    addiw a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2147483647 ; 0x7FFFFFFF
+}
+
+; This can be materialized with LUI+SRLI, improving compressibility.
+
+define signext i32 @imm_right_shifted_lui() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 56
+; RV64I-NEXT:    addiw a0, a0, 580
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 56
+; RV64IZBA-NEXT:    addiw a0, a0, 580
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 56
+; RV64IZBB-NEXT:    addiw a0, a0, 580
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 56
+; RV64IZBS-NEXT:    addiw a0, a0, 580
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 56
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 580
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 229956 ; 0x38244
+}
+
+define i64 @imm64_1() nounwind {
+; RV64I-LABEL: imm64_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 31
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 31
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147483648 ; 0x8000_0000
+}
+
+define i64 @imm64_2() nounwind {
+; RV64I-LABEL: imm64_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    srli a0, a0, 32
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    srli a0, a0, 32
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -1
+; RV64IZBS-NEXT:    srli a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294967295 ; 0xFFFF_FFFF
+}
+
+define i64 @imm64_3() nounwind {
+; RV64I-LABEL: imm64_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 32
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 32
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 32
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294967296 ; 0x1_0000_0000
+}
+
+define i64 @imm64_4() nounwind {
+; RV64I-LABEL: imm64_4:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_4:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    slli a0, a0, 63
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_4:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    slli a0, a0, 63
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_4:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_4:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223372036854775808 ; 0x8000_0000_0000_0000
+}
+
+define i64 @imm64_5() nounwind {
+; RV64I-LABEL: imm64_5:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_5:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    slli a0, a0, 63
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_5:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    slli a0, a0, 63
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_5:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_5:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000
+}
+
+define i64 @imm64_6() nounwind {
+; RV64I-LABEL: imm64_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 9321
+; RV64I-NEXT:    addi a0, a0, -1329
+; RV64I-NEXT:    slli a0, a0, 35
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_6:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 9321
+; RV64IZBA-NEXT:    addi a0, a0, -1329
+; RV64IZBA-NEXT:    slli a0, a0, 35
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_6:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 9321
+; RV64IZBB-NEXT:    addi a0, a0, -1329
+; RV64IZBB-NEXT:    slli a0, a0, 35
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_6:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 9321
+; RV64IZBS-NEXT:    addi a0, a0, -1329
+; RV64IZBS-NEXT:    slli a0, a0, 35
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_6:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 9321
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1329
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
+}
+
+define i64 @imm64_7() nounwind {
+; RV64I-LABEL: imm64_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 7
+; RV64I-NEXT:    slli a0, a0, 36
+; RV64I-NEXT:    addi a0, a0, 11
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_7:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 7
+; RV64IZBA-NEXT:    slli a0, a0, 36
+; RV64IZBA-NEXT:    addi a0, a0, 11
+; RV64IZBA-NEXT:    slli a0, a0, 24
+; RV64IZBA-NEXT:    addi a0, a0, 15
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_7:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 7
+; RV64IZBB-NEXT:    slli a0, a0, 36
+; RV64IZBB-NEXT:    addi a0, a0, 11
+; RV64IZBB-NEXT:    slli a0, a0, 24
+; RV64IZBB-NEXT:    addi a0, a0, 15
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_7:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 7
+; RV64IZBS-NEXT:    slli a0, a0, 36
+; RV64IZBS-NEXT:    addi a0, a0, 11
+; RV64IZBS-NEXT:    slli a0, a0, 24
+; RV64IZBS-NEXT:    addi a0, a0, 15
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_7:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 7
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 11
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 15
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F
+}
+
+; TODO: it can be preferable to put constants that are expensive to materialise
+; into the constant pool, especially for -Os.
+define i64 @imm64_8() nounwind {
+; RV64-NOPOOL-LABEL: imm64_8:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 583
+; RV64-NOPOOL-NEXT:    addiw a0, a0, -1875
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -947
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, 1511
+; RV64-NOPOOL-NEXT:    slli a0, a0, 13
+; RV64-NOPOOL-NEXT:    addi a0, a0, -272
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm64_8:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI17_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_8:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 596523
+; RV64IZBA-NEXT:    addi a0, a0, 965
+; RV64IZBA-NEXT:    slli.uw a0, a0, 13
+; RV64IZBA-NEXT:    addi a0, a0, -1347
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -529
+; RV64IZBA-NEXT:    slli a0, a0, 4
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_8:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 583
+; RV64IZBB-NEXT:    addiw a0, a0, -1875
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -947
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1511
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -272
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_8:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 583
+; RV64IZBS-NEXT:    addiw a0, a0, -1875
+; RV64IZBS-NEXT:    slli a0, a0, 14
+; RV64IZBS-NEXT:    addi a0, a0, -947
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1511
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -272
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_8:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 583
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1875
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -947
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1511
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -272
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0
+}
+
+define i64 @imm64_9() nounwind {
+; RV64I-LABEL: imm64_9:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_9:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_9:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_9:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_9:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1
+}
+
+; Various cases where extraneous ADDIs can be inserted where a (left shifted)
+; LUI suffices.
+
+define i64 @imm_left_shifted_lui_1() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 262145
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 262145
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 262145
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 262145
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147491840 ; 0x8000_2000
+}
+
+define i64 @imm_left_shifted_lui_2() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 262145
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 262145
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 262145
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 262145
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 262145
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 4294983680 ; 0x1_0000_4000
+}
+
+define i64 @imm_left_shifted_lui_3() nounwind {
+; RV64I-LABEL: imm_left_shifted_lui_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 4097
+; RV64I-NEXT:    slli a0, a0, 20
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_left_shifted_lui_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 4097
+; RV64IZBA-NEXT:    slli a0, a0, 20
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_left_shifted_lui_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 4097
+; RV64IZBB-NEXT:    slli a0, a0, 20
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_left_shifted_lui_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 4097
+; RV64IZBS-NEXT:    slli a0, a0, 20
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 17596481011712 ; 0x1001_0000_0000
+}
+
+; Various cases where extraneous ADDIs can be inserted where a (right shifted)
+; LUI suffices, or where multiple ADDIs can be used instead of a single LUI.
+
+define i64 @imm_right_shifted_lui_1() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 983056
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 983056
+; RV64IZBA-NEXT:    srli a0, a0, 16
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 983056
+; RV64IZBB-NEXT:    srli a0, a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 983056
+; RV64IZBS-NEXT:    srli a0, a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 983056
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 281474976706561 ; 0xFFFF_FFFF_F001
+}
+
+define i64 @imm_right_shifted_lui_2() nounwind {
+; RV64I-LABEL: imm_right_shifted_lui_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1044481
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    srli a0, a0, 24
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_right_shifted_lui_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 1044481
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    srli a0, a0, 24
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_right_shifted_lui_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1044481
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    srli a0, a0, 24
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_right_shifted_lui_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1044481
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    srli a0, a0, 24
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1044481
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1099511623681 ; 0xFF_FFFF_F001
+}
+
+; We can materialize the upper bits with a single (shifted) LUI, but that option
+; can be missed due to the lower bits, which aren't just 1s or just 0s.
+
+define i64 @imm_decoupled_lui_addi() nounwind {
+; RV64I-LABEL: imm_decoupled_lui_addi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 4097
+; RV64I-NEXT:    slli a0, a0, 20
+; RV64I-NEXT:    addi a0, a0, -3
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_decoupled_lui_addi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 4097
+; RV64IZBA-NEXT:    slli a0, a0, 20
+; RV64IZBA-NEXT:    addi a0, a0, -3
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_decoupled_lui_addi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 4097
+; RV64IZBB-NEXT:    slli a0, a0, 20
+; RV64IZBB-NEXT:    addi a0, a0, -3
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_decoupled_lui_addi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 4097
+; RV64IZBS-NEXT:    slli a0, a0, 20
+; RV64IZBS-NEXT:    addi a0, a0, -3
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_decoupled_lui_addi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 4097
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -3
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 17596481011709 ; 0x1000_FFFF_FFFD
+}
+
+; This constant can be materialized for RV64 with LUI+SRLI+XORI.
+
+define i64 @imm_end_xori_1() nounwind {
+; RV64I-LABEL: imm_end_xori_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 983040
+; RV64I-NEXT:    srli a0, a0, 3
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_end_xori_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 983040
+; RV64IZBA-NEXT:    srli a0, a0, 3
+; RV64IZBA-NEXT:    not a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_end_xori_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 983040
+; RV64IZBB-NEXT:    srli a0, a0, 3
+; RV64IZBB-NEXT:    not a0, a0
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_end_xori_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 983040
+; RV64IZBS-NEXT:    srli a0, a0, 3
+; RV64IZBS-NEXT:    not a0, a0
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_xori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 983040
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 3
+; RV64IXTHEADBB-NEXT:    not a0, a0
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
+}
+
+; This constant can be materialized for RV64 with ADDI+SLLI+ADDI+ADDI.
+
+define i64 @imm_end_2addi_1() nounwind {
+; RV64I-LABEL: imm_end_2addi_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -2047
+; RV64I-NEXT:    slli a0, a0, 39
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_end_2addi_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -2047
+; RV64IZBA-NEXT:    slli a0, a0, 39
+; RV64IZBA-NEXT:    addi a0, a0, -2048
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_end_2addi_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -2047
+; RV64IZBB-NEXT:    slli a0, a0, 39
+; RV64IZBB-NEXT:    addi a0, a0, -2048
+; RV64IZBB-NEXT:    addi a0, a0, -1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_end_2addi_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -2047
+; RV64IZBS-NEXT:    slli a0, a0, 39
+; RV64IZBS-NEXT:    addi a0, a0, -2048
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_end_2addi_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -2047
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 39
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
+}
+
+; This constant can be more efficiently materialized for RV64 if we use two
+; registers instead of one.
+
+define i64 @imm_2reg_1() nounwind {
+; RV64I-LABEL: imm_2reg_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 74565
+; RV64I-NEXT:    addiw a0, a0, 1656
+; RV64I-NEXT:    slli a1, a0, 57
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2reg_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 74565
+; RV64IZBA-NEXT:    addiw a0, a0, 1656
+; RV64IZBA-NEXT:    slli a1, a0, 57
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2reg_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 74565
+; RV64IZBB-NEXT:    addiw a0, a0, 1656
+; RV64IZBB-NEXT:    slli a1, a0, 57
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2reg_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 74565
+; RV64IZBS-NEXT:    addiw a0, a0, 1656
+; RV64IZBS-NEXT:    slli a1, a0, 57
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2reg_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 74565
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1656
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 57
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
+}
+
+; FIXME: This should use a single ADDI for the immediate.
+define void @imm_store_i16_neg1(ptr %p) nounwind {
+; RV64I-LABEL: imm_store_i16_neg1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sh a1, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_store_i16_neg1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a1, -1
+; RV64IZBA-NEXT:    sh a1, 0(a0)
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_store_i16_neg1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a1, -1
+; RV64IZBB-NEXT:    sh a1, 0(a0)
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_store_i16_neg1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a1, -1
+; RV64IZBS-NEXT:    sh a1, 0(a0)
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i16_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sh a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
+  store i16 -1, ptr %p
+  ret void
+}
+
+; FIXME: This should use a single ADDI for the immediate.
+define void @imm_store_i32_neg1(ptr %p) nounwind {
+; RV64I-LABEL: imm_store_i32_neg1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_store_i32_neg1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a1, -1
+; RV64IZBA-NEXT:    sw a1, 0(a0)
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_store_i32_neg1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a1, -1
+; RV64IZBB-NEXT:    sw a1, 0(a0)
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_store_i32_neg1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a1, -1
+; RV64IZBS-NEXT:    sw a1, 0(a0)
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i32_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, -1
+; RV64IXTHEADBB-NEXT:    sw a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
+  store i32 -1, ptr %p
+  ret void
+}
+
+define i64 @imm_5372288229() {
+; RV64I-LABEL: imm_5372288229:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 160
+; RV64I-NEXT:    addiw a0, a0, 437
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -795
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_5372288229:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 655797
+; RV64IZBA-NEXT:    slli.uw a0, a0, 1
+; RV64IZBA-NEXT:    addi a0, a0, -795
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_5372288229:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 160
+; RV64IZBB-NEXT:    addiw a0, a0, 437
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -795
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_5372288229:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 263018
+; RV64IZBS-NEXT:    addiw a0, a0, -795
+; RV64IZBS-NEXT:    bseti a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 160
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -795
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 5372288229
+}
+
+define i64 @imm_neg_5372288229() {
+; RV64I-LABEL: imm_neg_5372288229:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048416
+; RV64I-NEXT:    addiw a0, a0, -437
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, 795
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_5372288229:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_5372288229:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048416
+; RV64IZBB-NEXT:    addiw a0, a0, -437
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, 795
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_5372288229:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 785558
+; RV64IZBS-NEXT:    addiw a0, a0, 795
+; RV64IZBS-NEXT:    bclri a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5372288229:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048416
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -437
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 795
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5372288229
+}
+
+define i64 @imm_8953813715() {
+; RV64I-LABEL: imm_8953813715:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 267
+; RV64I-NEXT:    addiw a0, a0, -637
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -1325
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_8953813715:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 437198
+; RV64IZBA-NEXT:    addiw a0, a0, -265
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_8953813715:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 267
+; RV64IZBB-NEXT:    addiw a0, a0, -637
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -1325
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_8953813715:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 88838
+; RV64IZBS-NEXT:    addiw a0, a0, -1325
+; RV64IZBS-NEXT:    bseti a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 267
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1325
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 8953813715
+}
+
+define i64 @imm_neg_8953813715() {
+; RV64I-LABEL: imm_neg_8953813715:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048309
+; RV64I-NEXT:    addiw a0, a0, 637
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, 1325
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_8953813715:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_8953813715:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048309
+; RV64IZBB-NEXT:    addiw a0, a0, 637
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, 1325
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_8953813715:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 959738
+; RV64IZBS-NEXT:    addiw a0, a0, 1325
+; RV64IZBS-NEXT:    bclri a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8953813715:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048309
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 637
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1325
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -8953813715
+}
+
+define i64 @imm_16116864687() {
+; RV64I-LABEL: imm_16116864687:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 961
+; RV64I-NEXT:    addiw a0, a0, -1475
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1711
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_16116864687:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 437198
+; RV64IZBA-NEXT:    addiw a0, a0, -265
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_16116864687:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 961
+; RV64IZBB-NEXT:    addiw a0, a0, -1475
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1711
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_16116864687:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 961
+; RV64IZBS-NEXT:    addiw a0, a0, -1475
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1711
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 961
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1711
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 16116864687
+}
+
+define i64 @imm_neg_16116864687() {
+; RV64I-LABEL: imm_neg_16116864687:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1047615
+; RV64I-NEXT:    addiw a0, a0, 1475
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, -1711
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_16116864687:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 611378
+; RV64IZBA-NEXT:    addiw a0, a0, 265
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_16116864687:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1047615
+; RV64IZBB-NEXT:    addiw a0, a0, 1475
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -1711
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_16116864687:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1047615
+; RV64IZBS-NEXT:    addiw a0, a0, 1475
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, -1711
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_16116864687:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047615
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1475
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1711
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -16116864687
+}
+
+define i64 @imm_2344336315() {
+; RV64I-LABEL: imm_2344336315:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 143087
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2344336315:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 143087
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2344336315:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 143087
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2344336315:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 143087
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2344336315:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 143087
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2344336315 ; 0x8bbbbbbb
+}
+
+define i64 @imm_70370820078523() {
+; RV64-NOPOOL-LABEL: imm_70370820078523:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 256
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 31
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_70370820078523:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI37_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI37_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_70370820078523:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 256
+; RV64IZBA-NEXT:    addiw a0, a0, 31
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_70370820078523:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 256
+; RV64IZBB-NEXT:    addiw a0, a0, 31
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_70370820078523:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 46
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_70370820078523:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 256
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 70370820078523 ; 0x40007bbbbbbb
+}
+
+define i64 @imm_neg_9223372034778874949() {
+; RV64I-LABEL: imm_neg_9223372034778874949:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 506812
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223372034778874949:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 506812
+; RV64IZBA-NEXT:    addiw a0, a0, -1093
+; RV64IZBA-NEXT:    slli a1, a0, 63
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223372034778874949:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 506812
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a1, a0, 63
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223372034778874949:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 506812
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
+}
+
+define i64 @imm_neg_9223301666034697285() {
+; RV64-NOPOOL-LABEL: imm_neg_9223301666034697285:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 917505
+; RV64-NOPOOL-NEXT:    slli a0, a0, 8
+; RV64-NOPOOL-NEXT:    addi a0, a0, 31
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_neg_9223301666034697285:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI39_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI39_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223301666034697285:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 917505
+; RV64IZBA-NEXT:    slli a0, a0, 8
+; RV64IZBA-NEXT:    addi a0, a0, 31
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223301666034697285:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 917505
+; RV64IZBB-NEXT:    slli a0, a0, 8
+; RV64IZBB-NEXT:    addi a0, a0, 31
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223301666034697285:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 506812
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bseti a0, a0, 46
+; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223301666034697285:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917505
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 8
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -9223301666034697285 ; 0x800040007bbbbbbb
+}
+
+define i64 @imm_neg_2219066437() {
+; RV64I-LABEL: imm_neg_2219066437:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 913135
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2219066437:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 913135
+; RV64IZBA-NEXT:    slli a0, a0, 2
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2219066437:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 913135
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2219066437:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 913135
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2219066437:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 913135
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2219066437 ; 0xffffffff7bbbbbbb
+}
+
+define i64 @imm_neg_8798043653189() {
+; RV64I-LABEL: imm_neg_8798043653189:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 917475
+; RV64I-NEXT:    addiw a0, a0, -273
+; RV64I-NEXT:    slli a0, a0, 14
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_8798043653189:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 917475
+; RV64IZBA-NEXT:    addiw a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_8798043653189:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 917475
+; RV64IZBB-NEXT:    addiw a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_8798043653189:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 43
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_8798043653189:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 917475
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb
+}
+
+define i64 @imm_9223372034904144827() {
+; RV64I-LABEL: imm_9223372034904144827:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 572348
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_9223372034904144827:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 572348
+; RV64IZBA-NEXT:    addiw a0, a0, -1093
+; RV64IZBA-NEXT:    slli a1, a0, 63
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_9223372034904144827:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 572348
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a1, a0, 63
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_9223372034904144827:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 572348
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
+}
+
+define i64 @imm_neg_9223354442718100411() {
+; RV64-NOPOOL-LABEL: imm_neg_9223354442718100411:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 524287
+; RV64-NOPOOL-NEXT:    slli a0, a0, 6
+; RV64-NOPOOL-NEXT:    addi a0, a0, -29
+; RV64-NOPOOL-NEXT:    slli a0, a0, 12
+; RV64-NOPOOL-NEXT:    addi a0, a0, -273
+; RV64-NOPOOL-NEXT:    slli a0, a0, 14
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm_neg_9223354442718100411:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI43_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI43_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_9223354442718100411:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524287
+; RV64IZBA-NEXT:    slli a0, a0, 6
+; RV64IZBA-NEXT:    addi a0, a0, -29
+; RV64IZBA-NEXT:    slli a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -273
+; RV64IZBA-NEXT:    slli a0, a0, 14
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_9223354442718100411:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524287
+; RV64IZBB-NEXT:    slli a0, a0, 6
+; RV64IZBB-NEXT:    addi a0, a0, -29
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, -273
+; RV64IZBB-NEXT:    slli a0, a0, 14
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_9223354442718100411:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 572348
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    bclri a0, a0, 44
+; RV64IZBS-NEXT:    bclri a0, a0, 63
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_9223354442718100411:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524287
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 6
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -29
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb
+}
+
+define i64 @imm_2863311530() {
+; RV64I-LABEL: imm_2863311530:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 349525
+; RV64I-NEXT:    addiw a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2863311530:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    addiw a0, a0, 1365
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2863311530:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 349525
+; RV64IZBB-NEXT:    addiw a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2863311530:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 349525
+; RV64IZBS-NEXT:    addiw a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 349525
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+	ret i64 2863311530 ; #0xaaaaaaaa
+}
+
+define i64 @imm_neg_2863311530() {
+; RV64I-LABEL: imm_neg_2863311530:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 699051
+; RV64I-NEXT:    addiw a0, a0, -1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2863311530:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 699051
+; RV64IZBA-NEXT:    addiw a0, a0, -1365
+; RV64IZBA-NEXT:    slli a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2863311530:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 699051
+; RV64IZBB-NEXT:    addiw a0, a0, -1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2863311530:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 699051
+; RV64IZBS-NEXT:    addiw a0, a0, -1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2863311530:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 699051
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+	ret i64 -2863311530 ; #0xffffffff55555556
+}
+
+define i64 @imm_2147486378() {
+; RV64I-LABEL: imm_2147486378:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    addi a0, a0, 1365
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_2147486378:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 31
+; RV64IZBA-NEXT:    addi a0, a0, 1365
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_2147486378:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 31
+; RV64IZBB-NEXT:    addi a0, a0, 1365
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_2147486378:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, 1365
+; RV64IZBS-NEXT:    bseti a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_2147486378:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 2147485013
+}
+
+define i64 @imm_neg_2147485013() {
+; RV64I-LABEL: imm_neg_2147485013:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 524288
+; RV64I-NEXT:    addi a0, a0, -1365
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_2147485013:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 524288
+; RV64IZBA-NEXT:    addi a0, a0, -1365
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_2147485013:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 524288
+; RV64IZBB-NEXT:    addi a0, a0, -1365
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_2147485013:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 524288
+; RV64IZBS-NEXT:    addi a0, a0, -1365
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_2147485013:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 524288
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1365
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2147485013
+}
+
+define i64 @imm_12900924131259() {
+; RV64I-LABEL: imm_12900924131259:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 188
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    addi a0, a0, 1979
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900924131259:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 1979
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900924131259:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 188
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 24
+; RV64IZBB-NEXT:    addi a0, a0, 1979
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900924131259:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 188
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 24
+; RV64IZBS-NEXT:    addi a0, a0, 1979
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900924131259:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1979
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900924131259
+}
+
+define i64 @imm_50394234880() {
+; RV64I-LABEL: imm_50394234880:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 188
+; RV64I-NEXT:    addiw a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_50394234880:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    slli.uw a0, a0, 4
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_50394234880:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 188
+; RV64IZBB-NEXT:    addiw a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 16
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_50394234880:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 188
+; RV64IZBS-NEXT:    addiw a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 16
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_50394234880:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 188
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 16
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 50394234880
+}
+
+define i64 @imm_12900936431479() {
+; RV64I-LABEL: imm_12900936431479:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 192239
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    addi a0, a0, -1093
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1911
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900936431479:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768956
+; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 1911
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900936431479:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 192239
+; RV64IZBB-NEXT:    slli a0, a0, 2
+; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1911
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900936431479:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 192239
+; RV64IZBS-NEXT:    slli a0, a0, 2
+; RV64IZBS-NEXT:    addi a0, a0, -1093
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1911
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900936431479:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 192239
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1911
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900936431479
+}
+
+define i64 @imm_12900918536874() {
+; RV64I-LABEL: imm_12900918536874:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 384477
+; RV64I-NEXT:    addiw a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1365
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900918536874:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    addi a0, a0, -1365
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, -1366
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900918536874:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 384477
+; RV64IZBB-NEXT:    addiw a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1365
+; RV64IZBB-NEXT:    slli a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900918536874:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 384477
+; RV64IZBS-NEXT:    addiw a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1365
+; RV64IZBS-NEXT:    slli a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900918536874:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 384477
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900918536874
+}
+
+define i64 @imm_12900925247761() {
+; RV64I-LABEL: imm_12900925247761:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 384478
+; RV64I-NEXT:    addiw a0, a0, -1911
+; RV64I-NEXT:    slli a0, a0, 13
+; RV64I-NEXT:    addi a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -1775
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12900925247761:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    addi a0, a0, 273
+; RV64IZBA-NEXT:    slli.uw a0, a0, 12
+; RV64IZBA-NEXT:    addi a0, a0, 273
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12900925247761:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 384478
+; RV64IZBB-NEXT:    addiw a0, a0, -1911
+; RV64IZBB-NEXT:    slli a0, a0, 13
+; RV64IZBB-NEXT:    addi a0, a0, -2048
+; RV64IZBB-NEXT:    addi a0, a0, -1775
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12900925247761:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 384478
+; RV64IZBS-NEXT:    addiw a0, a0, -1911
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -2048
+; RV64IZBS-NEXT:    addi a0, a0, -1775
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12900925247761:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 384478
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1911
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1775
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12900925247761
+}
+
+define i64 @imm_7158272001() {
+; RV64I-LABEL: imm_7158272001:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 427
+; RV64I-NEXT:    addiw a0, a0, -1367
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_7158272001:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_7158272001:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 427
+; RV64IZBB-NEXT:    addiw a0, a0, -1367
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_7158272001:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 427
+; RV64IZBS-NEXT:    addiw a0, a0, -1367
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_7158272001:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 427
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1367
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 7158272001 ; 0x0000_0001_aaaa_9001
+}
+
+define i64 @imm_12884889601() {
+; RV64I-LABEL: imm_12884889601:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 768
+; RV64I-NEXT:    addiw a0, a0, -3
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_12884889601:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 349525
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_12884889601:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 768
+; RV64IZBB-NEXT:    addiw a0, a0, -3
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_12884889601:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 768
+; RV64IZBS-NEXT:    addiw a0, a0, -3
+; RV64IZBS-NEXT:    slli a0, a0, 12
+; RV64IZBS-NEXT:    addi a0, a0, 1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_12884889601:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 768
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -3
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 12884889601 ; 0x0000_0002_ffff_d001
+}
+
+define i64 @imm_neg_3435982847() {
+; RV64I-LABEL: imm_neg_3435982847:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048371
+; RV64I-NEXT:    addiw a0, a0, 817
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_3435982847:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_3435982847:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048371
+; RV64IZBB-NEXT:    addiw a0, a0, 817
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_3435982847:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 734001
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_3435982847:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048371
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 817
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -3435982847 ; 0xffff_ffff_3333_1001
+}
+
+define i64 @imm_neg_5726842879() {
+; RV64I-LABEL: imm_neg_5726842879:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048235
+; RV64I-NEXT:    addiw a0, a0, -1419
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_5726842879:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768945
+; RV64IZBA-NEXT:    sh2add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_5726842879:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048235
+; RV64IZBB-NEXT:    addiw a0, a0, -1419
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_5726842879:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 698997
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 32
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_5726842879:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048235
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1419
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001
+}
+
+define i64 @imm_neg_10307948543() {
+; RV64I-LABEL: imm_neg_10307948543:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1047962
+; RV64I-NEXT:    addiw a0, a0, -1645
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_neg_10307948543:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 768955
+; RV64IZBA-NEXT:    sh3add a0, a0, a0
+; RV64IZBA-NEXT:    addi a0, a0, 1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_neg_10307948543:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1047962
+; RV64IZBB-NEXT:    addiw a0, a0, -1645
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    addi a0, a0, 1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_neg_10307948543:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 629139
+; RV64IZBS-NEXT:    addiw a0, a0, 1
+; RV64IZBS-NEXT:    bclri a0, a0, 33
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_neg_10307948543:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1047962
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1645
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -10307948543 ; 0xffff_fffd_9999_3001
+}
+
+define i64 @li_rori_1() {
+; RV64I-LABEL: li_rori_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -17
+; RV64I-NEXT:    slli a0, a0, 43
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -17
+; RV64IZBA-NEXT:    slli a0, a0, 43
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -18
+; RV64IZBB-NEXT:    rori a0, a0, 21
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -17
+; RV64IZBS-NEXT:    slli a0, a0, 43
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 21
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -149533581377537
+}
+
+define i64 @li_rori_2() {
+; RV64I-LABEL: li_rori_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -5
+; RV64I-NEXT:    slli a0, a0, 60
+; RV64I-NEXT:    addi a0, a0, -6
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_2:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -5
+; RV64IZBA-NEXT:    slli a0, a0, 60
+; RV64IZBA-NEXT:    addi a0, a0, -6
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_2:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -86
+; RV64IZBB-NEXT:    rori a0, a0, 4
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_2:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -5
+; RV64IZBS-NEXT:    slli a0, a0, 60
+; RV64IZBS-NEXT:    addi a0, a0, -6
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_2:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -86
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 4
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -5764607523034234886
+}
+
+define i64 @li_rori_3() {
+; RV64I-LABEL: li_rori_3:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, -17
+; RV64I-NEXT:    slli a0, a0, 27
+; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: li_rori_3:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, -17
+; RV64IZBA-NEXT:    slli a0, a0, 27
+; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: li_rori_3:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, -18
+; RV64IZBB-NEXT:    rori a0, a0, 37
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: li_rori_3:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a0, -17
+; RV64IZBS-NEXT:    slli a0, a0, 27
+; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: li_rori_3:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, -18
+; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 37
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2281701377
+}
+
+; This used to assert when compiled with Zba.
+define i64 @PR54812() {
+; RV64I-LABEL: PR54812:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 1048447
+; RV64I-NEXT:    addiw a0, a0, 1407
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: PR54812:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 872917
+; RV64IZBA-NEXT:    sh1add a0, a0, a0
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: PR54812:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 1048447
+; RV64IZBB-NEXT:    addiw a0, a0, 1407
+; RV64IZBB-NEXT:    slli a0, a0, 12
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: PR54812:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 1045887
+; RV64IZBS-NEXT:    bclri a0, a0, 31
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: PR54812:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 1048447
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1407
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 -2158497792;
+}
+
+define signext i32 @pos_2048() nounwind {
+; RV64I-LABEL: pos_2048:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: pos_2048:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a0, 1
+; RV64IZBA-NEXT:    slli a0, a0, 11
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: pos_2048:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a0, 1
+; RV64IZBB-NEXT:    slli a0, a0, 11
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: pos_2048:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    bseti a0, zero, 11
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: pos_2048:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a0, 1
+; RV64IXTHEADBB-NEXT:    slli a0, a0, 11
+; RV64IXTHEADBB-NEXT:    ret
+  ret i32 2048
+}
+
+define i64 @imm64_same_lo_hi() nounwind {
+; RV64I-LABEL: imm64_same_lo_hi:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 65793
+; RV64I-NEXT:    addiw a0, a0, 16
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_same_lo_hi:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 65793
+; RV64IZBA-NEXT:    addiw a0, a0, 16
+; RV64IZBA-NEXT:    slli a1, a0, 32
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_same_lo_hi:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 65793
+; RV64IZBB-NEXT:    addiw a0, a0, 16
+; RV64IZBB-NEXT:    slli a1, a0, 32
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_same_lo_hi:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 65793
+; RV64IZBS-NEXT:    addiw a0, a0, 16
+; RV64IZBS-NEXT:    slli a1, a0, 32
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_same_lo_hi:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 65793
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 16
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1157442765409226768 ; 0x0101010101010101
+}
+
+; Same as above with optsize. Make sure we use constant pool on RV64
+define i64 @imm64_same_lo_hi_optsize() nounwind optsize {
+; RV64-NOPOOL-LABEL: imm64_same_lo_hi_optsize:
+; RV64-NOPOOL:       # %bb.0:
+; RV64-NOPOOL-NEXT:    lui a0, 65793
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 16
+; RV64-NOPOOL-NEXT:    slli a1, a0, 32
+; RV64-NOPOOL-NEXT:    add a0, a0, a1
+; RV64-NOPOOL-NEXT:    ret
+;
+; RV64I-POOL-LABEL: imm64_same_lo_hi_optsize:
+; RV64I-POOL:       # %bb.0:
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI64_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
+; RV64I-POOL-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    lui a0, 65793
+; RV64IZBA-NEXT:    addiw a0, a0, 16
+; RV64IZBA-NEXT:    slli a1, a0, 32
+; RV64IZBA-NEXT:    add a0, a0, a1
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    lui a0, 65793
+; RV64IZBB-NEXT:    addiw a0, a0, 16
+; RV64IZBB-NEXT:    slli a1, a0, 32
+; RV64IZBB-NEXT:    add a0, a0, a1
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm64_same_lo_hi_optsize:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    lui a0, 65793
+; RV64IZBS-NEXT:    addiw a0, a0, 16
+; RV64IZBS-NEXT:    slli a1, a0, 32
+; RV64IZBS-NEXT:    add a0, a0, a1
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm64_same_lo_hi_optsize:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    lui a0, 65793
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 16
+; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
+; RV64IXTHEADBB-NEXT:    add a0, a0, a1
+; RV64IXTHEADBB-NEXT:    ret
+  ret i64 1157442765409226768 ; 0x0101010101010101
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
new file mode 100644
index 000000000000000..456a880891f7309
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+
+; Check indexed and unindexed, sext, zext and anyext loads
+
+define void @lb(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lb:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lb a2, 1(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = sext i8 %2 to i32
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i8, ptr %a
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lbu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lbu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a0, 1(a0)
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = zext i8 %2 to i32
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lh(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lh:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lh a2, 2(a0)
+; RV64I-NEXT:    lh zero, 0(a0)
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 1
+  %2 = load i16, ptr %1
+  %3 = sext i16 %2 to i32
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i16, ptr %a
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lhu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lhu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lhu a0, 2(a0)
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 1
+  %2 = load i16, ptr %1
+  %3 = zext i16 %2 to i32
+  store i32 %3, ptr %b
+  ret void
+}
+
+define void @lw(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw zero, 0(a0)
+; RV64I-NEXT:    sd a2, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i64 1
+  %2 = load i32, ptr %1
+  %3 = sext i32 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i32, ptr %a
+  store i64 %3, ptr %b
+  ret void
+}
+
+define void @lwu(ptr %a, ptr %b) nounwind {
+; RV64I-LABEL: lwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lwu a0, 4(a0)
+; RV64I-NEXT:    sd a0, 0(a1)
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i64 1
+  %2 = load i32, ptr %1
+  %3 = zext i32 %2 to i64
+  store i64 %3, ptr %b
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
new file mode 100644
index 000000000000000..76ab0e7d5810e70
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+
+; Check indexed and unindexed, sext, zext and anyext loads
+
+define dso_local i64 @lb(ptr %a) nounwind {
+; RV64I-LABEL: lb:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lb a1, 1(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 1
+  %2 = load i8, ptr %1
+  %3 = sext i8 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i8, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lh(ptr %a) nounwind {
+; RV64I-LABEL: lh:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lh a1, 4(a0)
+; RV64I-NEXT:    lh zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 2
+  %2 = load i16, ptr %1
+  %3 = sext i16 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i16, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lw(ptr %a) nounwind {
+; RV64I-LABEL: lw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    lw zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i32 3
+  %2 = load i32, ptr %1
+  %3 = sext i32 %2 to i64
+  ; the unused load will produce an anyext for selection
+  %4 = load volatile i32, ptr %a
+  ret i64 %3
+}
+
+define dso_local i64 @lbu(ptr %a) nounwind {
+; RV64I-LABEL: lbu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 4(a0)
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i8, ptr %a, i32 4
+  %2 = load i8, ptr %1
+  %3 = zext i8 %2 to i64
+  %4 = load volatile i8, ptr %a
+  %5 = zext i8 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+define dso_local i64 @lhu(ptr %a) nounwind {
+; RV64I-LABEL: lhu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lhu a1, 10(a0)
+; RV64I-NEXT:    lhu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i16, ptr %a, i32 5
+  %2 = load i16, ptr %1
+  %3 = zext i16 %2 to i64
+  %4 = load volatile i16, ptr %a
+  %5 = zext i16 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+define dso_local i64 @lwu(ptr %a) nounwind {
+; RV64I-LABEL: lwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lwu a1, 24(a0)
+; RV64I-NEXT:    lwu a0, 0(a0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = getelementptr i32, ptr %a, i32 6
+  %2 = load i32, ptr %1
+  %3 = zext i32 %2 to i64
+  %4 = load volatile i32, ptr %a
+  %5 = zext i32 %4 to i64
+  %6 = add i64 %3, %5
+  ret i64 %6
+}
+
+; 64-bit loads and stores
+
+define dso_local i64 @ld(ptr %a) nounwind {
+; RV64I-LABEL: ld:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ld a1, 80(a0)
+; RV64I-NEXT:    ld zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = getelementptr i64, ptr %a, i32 10
+  %2 = load i64, ptr %1
+  %3 = load volatile i64, ptr %a
+  ret i64 %2
+}
+
+define dso_local void @sd(ptr %a, i64 %b) nounwind {
+; RV64I-LABEL: sd:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    sd a1, 88(a0)
+; RV64I-NEXT:    ret
+  store i64 %b, ptr %a
+  %1 = getelementptr i64, ptr %a, i32 11
+  store i64 %b, ptr %1
+  ret void
+}
+
+; Check load and store to an i1 location
+define dso_local i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
+; RV64I-LABEL: load_sext_zext_anyext_i1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 1(a0)
+; RV64I-NEXT:    lbu a2, 2(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    sub a0, a2, a1
+; RV64I-NEXT:    ret
+  ; sextload i1
+  %1 = getelementptr i1, ptr %a, i32 1
+  %2 = load i1, ptr %1
+  %3 = sext i1 %2 to i64
+  ; zextload i1
+  %4 = getelementptr i1, ptr %a, i32 2
+  %5 = load i1, ptr %4
+  %6 = zext i1 %5 to i64
+  %7 = add i64 %3, %6
+  ; extload i1 (anyext). Produced as the load is unused.
+  %8 = load volatile i1, ptr %a
+  ret i64 %7
+}
+
+define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
+; RV64I-LABEL: load_sext_zext_anyext_i1_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lbu a1, 1(a0)
+; RV64I-NEXT:    lbu a2, 2(a0)
+; RV64I-NEXT:    lbu zero, 0(a0)
+; RV64I-NEXT:    subw a0, a2, a1
+; RV64I-NEXT:    ret
+  ; sextload i1
+  %1 = getelementptr i1, ptr %a, i32 1
+  %2 = load i1, ptr %1
+  %3 = sext i1 %2 to i16
+  ; zextload i1
+  %4 = getelementptr i1, ptr %a, i32 2
+  %5 = load i1, ptr %4
+  %6 = zext i1 %5 to i16
+  %7 = add i16 %3, %6
+  ; extload i1 (anyext). Produced as the load is unused.
+  %8 = load volatile i1, ptr %a
+  ret i16 %7
+}
+
+; Check load and store to a global
+ at G = dso_local global i64 0
+
+define dso_local i64 @ld_sd_global(i64 %a) nounwind {
+; RV64I-LABEL: ld_sd_global:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, %hi(G)
+; RV64I-NEXT:    ld a1, %lo(G)(a2)
+; RV64I-NEXT:    addi a3, a2, %lo(G)
+; RV64I-NEXT:    sd a0, %lo(G)(a2)
+; RV64I-NEXT:    ld zero, 72(a3)
+; RV64I-NEXT:    sd a0, 72(a3)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+  %1 = load volatile i64, ptr @G
+  store i64 %a, ptr @G
+  %2 = getelementptr i64, ptr @G, i64 9
+  %3 = load volatile i64, ptr %2
+  store i64 %a, ptr %2
+  ret i64 %1
+}
+
+define i64 @lw_near_local(ptr %a)  {
+; RV64I-LABEL: lw_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    ld a0, 9(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+define void @st_near_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 2047
+; RV64I-NEXT:    sd a1, 9(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  store i64 %b, ptr %1
+  ret void
+}
+
+define i64 @lw_sw_near_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_near_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, a0, 2047
+; RV64I-NEXT:    ld a0, 9(a2)
+; RV64I-NEXT:    sd a1, 9(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 257
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+define i64 @lw_far_local(ptr %a)  {
+; RV64I-LABEL: lw_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, -8(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+define void @st_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 8
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    sd a1, -8(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  store i64 %b, ptr %1
+  ret void
+}
+
+define i64 @lw_sw_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 8
+; RV64I-NEXT:    add a2, a0, a2
+; RV64I-NEXT:    ld a0, -8(a2)
+; RV64I-NEXT:    sd a1, -8(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 4095
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the load offset. The sign extend of the
+; addiw is required.
+define i64 @lw_really_far_local(ptr %a)  {
+; RV64I-LABEL: lw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    addiw a1, a1, -2048
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  %2 = load volatile i64, ptr %1
+  ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the store offset. The sign extend of
+; the addiw is required.
+define void @st_really_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: st_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  store i64 %b, ptr %1
+  ret void
+}
+
+; Make sure we don't fold the addiw into the load/store offset. The sign extend
+; of the addiw is required.
+define i64 @lw_sw_really_far_local(ptr %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a2, a0, a2
+; RV64I-NEXT:    ld a0, 0(a2)
+; RV64I-NEXT:    sd a1, 0(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, ptr %a, i64 268435200
+  %2 = load volatile i64, ptr %1
+  store i64 %b, ptr %1
+  ret i64 %2
+}
+
+%struct.quux = type { i32, [0 x i8] }
+
+; Make sure we don't remove the addi and fold the C from
+; (add (addi FrameIndex, C), X) into the store address.
+; FrameIndex cannot be the operand of an ADD. We must keep the ADDI.
+define void @addi_fold_crash(i64 %arg) nounwind {
+; RV64I-LABEL: addi_fold_crash:
+; RV64I:       # %bb.0: # %bb
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi a1, sp, 4
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    sb zero, 0(a0)
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    call snork at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+bb:
+  %tmp = alloca %struct.quux, align 8
+  %tmp1 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1
+  %tmp2 = getelementptr inbounds %struct.quux, ptr %tmp, i64 0, i32 1, i64 %arg
+  store i8 0, ptr %tmp2, align 1
+  call void @snork(ptr %tmp1)
+  ret void
+}
+
+declare void @snork(ptr)
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
new file mode 100644
index 000000000000000..11adbbdd245f1d0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=RV64IM %s
+
+define i32 @urem(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: urem:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @urem_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: urem_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i32 10, %a
+  ret i32 %1
+}
+
+define i32 @srem(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: srem:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, %b
+  ret i32 %1
+}
+
+define i32 @srem_pow2(i32 %a) nounwind {
+; RV64I-LABEL: srem_pow2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    add a1, a0, a1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_pow2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    add a1, a0, a1
+; RV64IM-NEXT:    andi a1, a1, -8
+; RV64IM-NEXT:    subw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @srem_pow2_2(i32 %a) nounwind {
+; RV64I-LABEL: srem_pow2_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    add a1, a0, a1
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_pow2_2:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    add a1, a0, a1
+; RV64IM-NEXT:    lui a2, 1048560
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    subw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i32 %a, 65536
+  ret i32 %1
+}
+
+define i32 @srem_constant_lhs(i32 %a) nounwind {
+; RV64I-LABEL: srem_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sext.w a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i32 -10, %a
+  ret i32 %1
+}
+
+define i64 @urem64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: urem64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __umoddi3 at plt
+;
+; RV64IM-LABEL: urem64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remu a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @urem64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: urem64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    tail __umoddi3 at plt
+;
+; RV64IM-LABEL: urem64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remu a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i64 10, %a
+  ret i64 %1
+}
+
+define i64 @srem64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: srem64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    tail __moddi3 at plt
+;
+; RV64IM-LABEL: srem64:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    rem a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @srem64_constant_lhs(i64 %a) nounwind {
+; RV64I-LABEL: srem64_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    tail __moddi3 at plt
+;
+; RV64IM-LABEL: srem64_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    rem a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i64 -10, %a
+  ret i64 %1
+}
+
+define i8 @urem8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: urem8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a1, a1, 255
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @urem8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: urem8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    andi a1, a0, 255
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    andi a0, a0, 255
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i8 10, %a
+  ret i8 %1
+}
+
+
+define i8 @srem8(i8 %a, i8 %b) nounwind {
+; RV64I-LABEL: srem8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    sraiw a1, a1, 24
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 24
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem8:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 24
+; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @srem8_constant_lhs(i8 %a) nounwind {
+; RV64I-LABEL: srem8_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a1, a0, 24
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem8_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 24
+; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i8 -10, %a
+  ret i8 %1
+}
+
+
+define i16 @urem16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: urem16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    lui a2, 16
+; RV64IM-NEXT:    addi a2, a2, -1
+; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    and a0, a0, a2
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = urem i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @urem16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: urem16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a1, a0, 48
+; RV64I-NEXT:    li a0, 10
+; RV64I-NEXT:    call __umoddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: urem16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 48
+; RV64IM-NEXT:    srli a0, a0, 48
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    remuw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = urem i16 10, %a
+  ret i16 %1
+}
+
+define i16 @srem16(i16 %a, i16 %b) nounwind {
+; RV64I-LABEL: srem16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    sraiw a1, a1, 16
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem16:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a1, 16
+; RV64IM-NEXT:    sraiw a1, a1, 16
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    remw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = srem i16 %a, %b
+  ret i16 %1
+}
+
+define i16 @srem16_constant_lhs(i16 %a) nounwind {
+; RV64I-LABEL: srem16_constant_lhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a1, a0, 16
+; RV64I-NEXT:    li a0, -10
+; RV64I-NEXT:    call __moddi3 at plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: srem16_constant_lhs:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a0, a0, 16
+; RV64IM-NEXT:    sraiw a0, a0, 16
+; RV64IM-NEXT:    li a1, -10
+; RV64IM-NEXT:    remw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = srem i16 -10, %a
+  ret i16 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
new file mode 100644
index 000000000000000..bb2f2b73d4a0c7d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
@@ -0,0 +1,902 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64XTHEADBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    j .LBB1_3
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:  .LBB1_3: # %cond.end
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    li a1, 31
+; RV64XTHEADBB-NEXT:    subw a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  %2 = sub i32 31, %1
+  ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    li s0, 32
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    beqz a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a1, a0, 24
+; RV64I-NEXT:  .LBB2_2: # %cond.end
+; RV64I-NEXT:    subw a0, s0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: log2_ceil_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    li a1, 32
+; RV64XTHEADBB-NEXT:    subw a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = sub i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %3 = sub i32 32, %2
+  ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    or a0, s0, a0
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    xori a0, a0, 31
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findLastSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a1, a0, 31, 0
+; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
+; RV64XTHEADBB-NEXT:    addi a1, a1, -32
+; RV64XTHEADBB-NEXT:    xori a1, a1, 31
+; RV64XTHEADBB-NEXT:    snez a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
+; RV64XTHEADBB-NEXT:    or a0, a0, a1
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %2 = xor i32 31, %1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 -1, i32 %2
+  ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 1
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    addi a0, a0, -32
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ctlz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB6_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    negw a1, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB6_2:
+; RV64XTHEADBB-NEXT:    li a0, 32
+; RV64XTHEADBB-NEXT:    ret
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    negw a1, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: findFirstSet_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    mv s0, a0
+; RV64XTHEADBB-NEXT:    negw a0, a0
+; RV64XTHEADBB-NEXT:    and a0, s0, a0
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    snez a1, s0
+; RV64XTHEADBB-NEXT:    addi a1, a1, -1
+; RV64XTHEADBB-NEXT:    or a0, a1, a0
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = icmp eq i32 %a, 0
+  %3 = select i1 %2, i32 -1, i32 %1
+  ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    seqz a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: ffs_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    addi sp, sp, -16
+; RV64XTHEADBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64XTHEADBB-NEXT:    mv s0, a0
+; RV64XTHEADBB-NEXT:    negw a0, a0
+; RV64XTHEADBB-NEXT:    and a0, s0, a0
+; RV64XTHEADBB-NEXT:    lui a1, 30667
+; RV64XTHEADBB-NEXT:    addiw a1, a1, 1329
+; RV64XTHEADBB-NEXT:    call __muldi3 at plt
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 27
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64XTHEADBB-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64XTHEADBB-NEXT:    add a0, a1, a0
+; RV64XTHEADBB-NEXT:    lbu a0, 0(a0)
+; RV64XTHEADBB-NEXT:    addi a0, a0, 1
+; RV64XTHEADBB-NEXT:    seqz a1, s0
+; RV64XTHEADBB-NEXT:    addi a1, a1, -1
+; RV64XTHEADBB-NEXT:    and a0, a1, a0
+; RV64XTHEADBB-NEXT:    slli a0, a0, 32
+; RV64XTHEADBB-NEXT:    srli a0, a0, 32
+; RV64XTHEADBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64XTHEADBB-NEXT:    addi sp, sp, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = add i32 %1, 1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 0, i32 %2
+  ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 58
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: cttz_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    beqz a0, .LBB10_2
+; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV64XTHEADBB-NEXT:    addi a1, a0, -1
+; RV64XTHEADBB-NEXT:    not a0, a0
+; RV64XTHEADBB-NEXT:    and a0, a0, a1
+; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
+; RV64XTHEADBB-NEXT:    li a1, 64
+; RV64XTHEADBB-NEXT:    sub a0, a1, a0
+; RV64XTHEADBB-NEXT:    ret
+; RV64XTHEADBB-NEXT:  .LBB10_2:
+; RV64XTHEADBB-NEXT:    li a0, 64
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sextb_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: no_sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 17
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    slli a0, a0, 17
+; RV64XTHEADBB-NEXT:    sraiw a0, a0, 16
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i32 %a, 17
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: sexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i64 @no_sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: no_sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 49
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    slli a0, a0, 49
+; RV64XTHEADBB-NEXT:    srai a0, a0, 48
+; RV64XTHEADBB-NEXT:    ret
+  %shl = shl i64 %a, 49
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zexth_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
+; RV64XTHEADBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+define i64 @zext_bf_i64(i64 %a) nounwind {
+; RV64I-LABEL: zext_bf_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 47
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zext_bf_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.extu a0, a0, 16, 1
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i64 %a, 1
+  %and = and i64 %1, 65535
+  ret i64 %and
+}
+
+define i64 @zext_i64_srliw(i64 %a) nounwind {
+; RV64I-LABEL: zext_i64_srliw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: zext_i64_srliw:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    srliw a0, a0, 16
+; RV64XTHEADBB-NEXT:    ret
+  %1 = lshr i64 %a, 16
+  %and = and i64 %1, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slliw a2, a2, 8
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 8
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addi a3, a3, -256
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    and a3, a0, a3
+; RV64I-NEXT:    slli a3, a3, 8
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i32_nosext:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.revw a0, a0
+; RV64XTHEADBB-NEXT:    sw a0, 0(a1)
+; RV64XTHEADBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %1, ptr %x
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    srli a5, a0, 8
+; RV64I-NEXT:    srliw a5, a5, 24
+; RV64I-NEXT:    slli a5, a5, 24
+; RV64I-NEXT:    or a3, a5, a3
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    and a4, a0, a4
+; RV64I-NEXT:    slli a4, a4, 24
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a3, a4, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 40
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64XTHEADBB-LABEL: bswap_i64:
+; RV64XTHEADBB:       # %bb.0:
+; RV64XTHEADBB-NEXT:    th.rev a0, a0
+; RV64XTHEADBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
new file mode 100644
index 000000000000000..8005ad60b8a1105
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
@@ -0,0 +1,1798 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB
+
+define i64 @slliuw(i64 %a) nounwind {
+; RV64I-LABEL: slliuw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 1
+; RV64ZBA-NEXT:    ret
+  %conv1 = shl i64 %a, 1
+  %shl = and i64 %conv1, 8589934590
+  ret i64 %shl
+}
+
+define i128 @slliuw_2(i32 signext %0, ptr %1) {
+; RV64I-LABEL: slliuw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 28
+; RV64I-NEXT:    add a1, a1, a0
+; RV64I-NEXT:    ld a0, 0(a1)
+; RV64I-NEXT:    ld a1, 8(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 4
+; RV64ZBA-NEXT:    add a1, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a1)
+; RV64ZBA-NEXT:    ld a1, 8(a1)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i128, ptr %1, i64 %3
+  %5 = load i128, ptr %4
+  ret i128 %5
+}
+
+define i64 @adduw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    add.uw a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %and = and i64 %b, 4294967295
+  %add = add i64 %and, %a
+  ret i64 %add
+}
+
+define signext i8 @adduw_2(i32 signext %0, ptr %1) {
+; RV64I-LABEL: adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lb a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lb a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i8, ptr %1, i64 %3
+  %5 = load i8, ptr %4
+  ret i8 %5
+}
+
+define i64 @zextw_i64(i64 %a) nounwind {
+; RV64I-LABEL: zextw_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zextw_i64:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  %and = and i64 %a, 4294967295
+  ret i64 %and
+}
+
+; This makes sure targetShrinkDemandedConstant changes the and immmediate to
+; allow zext.w or slli+srli.
+define i64 @zextw_demandedbits_i64(i64 %0) {
+; RV64I-LABEL: zextw_demandedbits_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ori a0, a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zextw_demandedbits_i64:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    ori a0, a0, 1
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  %2 = and i64 %0, 4294967294
+  %3 = or i64 %2, 1
+  ret i64 %3
+}
+
+define signext i16 @sh1add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i16, ptr %1, i64 %0
+  %4 = load i16, ptr %3
+  ret i16 %4
+}
+
+define signext i32 @sh2add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i32, ptr %1, i64 %0
+  %4 = load i32, ptr %3
+  ret i32 %4
+}
+
+define i64 @sh3add(i64 %0, ptr %1) {
+; RV64I-LABEL: sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = getelementptr inbounds i64, ptr %1, i64 %0
+  %4 = load i64, ptr %3
+  ret i64 %4
+}
+
+define signext i16 @sh1adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh1adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i16, ptr %1, i64 %3
+  %5 = load i16, ptr %4
+  ret i16 %5
+}
+
+define i64 @sh1adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh1adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 1
+  %4 = and i64 %3, 8589934590
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+define signext i32 @sh2adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh2adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i32, ptr %1, i64 %3
+  %5 = load i32, ptr %4
+  ret i32 %5
+}
+
+define i64 @sh2adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh2adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 2
+  %4 = and i64 %3, 17179869180
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+define i64 @sh3adduw(i32 signext %0, ptr %1) {
+; RV64I-LABEL: sh3adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = zext i32 %0 to i64
+  %4 = getelementptr inbounds i64, ptr %1, i64 %3
+  %5 = load i64, ptr %4
+  ret i64 %5
+}
+
+define i64 @sh3adduw_2(i64 %0, i64 %1) {
+; RV64I-LABEL: sh3adduw_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_2:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %3 = shl i64 %0, 3
+  %4 = and i64 %3, 34359738360
+  %5 = add i64 %4, %1
+  ret i64 %5
+}
+
+; Type legalization inserts a sext_inreg after the first add. That add will be
+; selected as sh2add which does not sign extend. SimplifyDemandedBits is unable
+; to remove the sext_inreg because it has multiple uses. The ashr will use the
+; sext_inreg to become sraiw. This leaves the sext_inreg only used by the shl.
+; If the shl is selected as sllw, we don't need the sext_inreg.
+define i64 @sh2add_extra_sext(i32 %x, i32 %y, i32 %z) {
+; RV64I-LABEL: sh2add_extra_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    sraiw a0, a0, 2
+; RV64I-NEXT:    mul a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2add_extra_sext:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a0, a0, 2
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    zext.w a1, a0
+; RV64ZBA-NEXT:    sllw a1, a2, a1
+; RV64ZBA-NEXT:    sraiw a0, a0, 2
+; RV64ZBA-NEXT:    mul a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %a = shl i32 %x, 2
+  %b = add i32 %a, %y
+  %c = shl i32 %z, %b
+  %d = ashr i32 %b, 2
+  %e = sext i32 %c to i64
+  %f = sext i32 %d to i64
+  %g = mul i64 %e, %f
+  ret i64 %g
+}
+
+define i64 @addmul6(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 6
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 6
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul10(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul10:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 10
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul10:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 10
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul12(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul12:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 12
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul12:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 12
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul18(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul18:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 18
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul18:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 18
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul20(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul20:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 20
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul20:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 20
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul24(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul24:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 24
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul24:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 24
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul36(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul36:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 36
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul36:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 36
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul40(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul40:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 40
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul40:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 40
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @addmul72(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul72:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 72
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul72:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 72
+  %d = add i64 %c, %b
+  ret i64 %d
+}
+
+define i64 @mul96(i64 %a) {
+; RV64I-LABEL: mul96:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 96
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul96:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 96
+  ret i64 %c
+}
+
+define i64 @mul160(i64 %a) {
+; RV64I-LABEL: mul160:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 160
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul160:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 160
+  ret i64 %c
+}
+
+define i64 @mul288(i64 %a) {
+; RV64I-LABEL: mul288:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 288
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul288:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 288
+  ret i64 %c
+}
+
+define i64 @zext_mul96(i32 signext %a) {
+; RV64I-LABEL: zext_mul96:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul96:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 96
+  ret i64 %c
+}
+
+define i64 @zext_mul160(i32 signext %a) {
+; RV64I-LABEL: zext_mul160:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul160:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 160
+  ret i64 %c
+}
+
+define i64 @zext_mul288(i32 signext %a) {
+; RV64I-LABEL: zext_mul288:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 9
+; RV64I-NEXT:    slli a1, a1, 37
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul288:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 5
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 288
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul12884901888(i32 signext %a) {
+; RV64I-LABEL: zext_mul12884901888:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul12884901888:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 12884901888
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul21474836480(i32 signext %a) {
+; RV64I-LABEL: zext_mul21474836480:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul21474836480:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 21474836480
+  ret i64 %c
+}
+
+; We can't use slli.uw becaues the shift amount is more than 31.
+; FIXME: The zext.w is unneeded.
+define i64 @zext_mul38654705664(i32 signext %a) {
+; RV64I-LABEL: zext_mul38654705664:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    li a1, 9
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: zext_mul38654705664:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    andi a0, a0, -1
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 32
+; RV64ZBA-NEXT:    ret
+  %b = zext i32 %a to i64
+  %c = mul i64 %b, 38654705664
+  ret i64 %c
+}
+
+define i64 @sh1add_imm(i64 %0) {
+; CHECK-LABEL: sh1add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, 5
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 1
+  %b = add i64 %a, 5
+  ret i64 %b
+}
+
+define i64 @sh2add_imm(i64 %0) {
+; CHECK-LABEL: sh2add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -6
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 2
+  %b = add i64 %a, -6
+  ret i64 %b
+}
+
+define i64 @sh3add_imm(i64 %0) {
+; CHECK-LABEL: sh3add_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, 7
+; CHECK-NEXT:    ret
+  %a = shl i64 %0, 3
+  %b = add i64 %a, 7
+  ret i64 %b
+}
+
+define i64 @sh1adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh1adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 31
+; RV64I-NEXT:    addi a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 1
+; RV64ZBA-NEXT:    addi a0, a0, 11
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 1
+  %c = add i64 %b, 11
+  ret i64 %c
+}
+
+define i64 @sh2adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh2adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 30
+; RV64I-NEXT:    addi a0, a0, -12
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 2
+; RV64ZBA-NEXT:    addi a0, a0, -12
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 2
+  %c = add i64 %b, -12
+  ret i64 %c
+}
+
+define i64 @sh3adduw_imm(i32 signext %0) {
+; RV64I-LABEL: sh3adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 29
+; RV64I-NEXT:    addi a0, a0, 13
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli.uw a0, a0, 3
+; RV64ZBA-NEXT:    addi a0, a0, 13
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = shl i64 %a, 3
+  %c = add i64 %b, 13
+  ret i64 %c
+}
+
+define i64 @adduw_imm(i32 signext %0) nounwind {
+; RV64I-LABEL: adduw_imm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 5
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: adduw_imm:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    addi a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %a = zext i32 %0 to i64
+  %b = add i64 %a, 5
+  ret i64 %b
+}
+
+define i64 @mul258(i64 %a) {
+; CHECK-LABEL: mul258:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 258
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 258
+  ret i64 %c
+}
+
+define i64 @mul260(i64 %a) {
+; CHECK-LABEL: mul260:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 260
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 260
+  ret i64 %c
+}
+
+define i64 @mul264(i64 %a) {
+; CHECK-LABEL: mul264:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 264
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 264
+  ret i64 %c
+}
+
+define i64 @imm_zextw() nounwind {
+; RV64I-LABEL: imm_zextw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, -2
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: imm_zextw:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a0, -2
+; RV64ZBA-NEXT:    zext.w a0, a0
+; RV64ZBA-NEXT:    ret
+  ret i64 4294967294 ; -2 in 32 bits.
+}
+
+define i64 @mul11(i64 %a) {
+; RV64I-LABEL: mul11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 11
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul11:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 11
+  ret i64 %c
+}
+
+define i64 @mul19(i64 %a) {
+; RV64I-LABEL: mul19:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 19
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul19:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 19
+  ret i64 %c
+}
+
+define i64 @mul13(i64 %a) {
+; RV64I-LABEL: mul13:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 13
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul13:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 13
+  ret i64 %c
+}
+
+define i64 @mul21(i64 %a) {
+; RV64I-LABEL: mul21:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 21
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul21:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 21
+  ret i64 %c
+}
+
+define i64 @mul37(i64 %a) {
+; RV64I-LABEL: mul37:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 37
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul37:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 37
+  ret i64 %c
+}
+
+define i64 @mul25(i64 %a) {
+; RV64I-LABEL: mul25:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 25
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul25:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 25
+  ret i64 %c
+}
+
+define i64 @mul41(i64 %a) {
+; RV64I-LABEL: mul41:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 41
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul41:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 41
+  ret i64 %c
+}
+
+define i64 @mul73(i64 %a) {
+; RV64I-LABEL: mul73:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 73
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul73:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 73
+  ret i64 %c
+}
+
+define i64 @mul27(i64 %a) {
+; RV64I-LABEL: mul27:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 27
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul27:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 27
+  ret i64 %c
+}
+
+define i64 @mul45(i64 %a) {
+; RV64I-LABEL: mul45:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 45
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul45:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 45
+  ret i64 %c
+}
+
+define i64 @mul81(i64 %a) {
+; RV64I-LABEL: mul81:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 81
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul81:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 81
+  ret i64 %c
+}
+
+define i64 @mul4098(i64 %a) {
+; RV64I-LABEL: mul4098:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4098:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4098
+  ret i64 %c
+}
+
+define i64 @mul4100(i64 %a) {
+; RV64I-LABEL: mul4100:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4100:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4100
+  ret i64 %c
+}
+
+define i64 @mul4104(i64 %a) {
+; RV64I-LABEL: mul4104:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 3
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul4104:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a0, 12
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
+  %c = mul i64 %a, 4104
+  ret i64 %c
+}
+
+define signext i32 @mulw192(i32 signext %a) {
+; CHECK-LABEL: mulw192:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 192
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 192
+  ret i32 %c
+}
+
+define signext i32 @mulw320(i32 signext %a) {
+; CHECK-LABEL: mulw320:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 320
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 320
+  ret i32 %c
+}
+
+define signext i32 @mulw576(i32 signext %a) {
+; CHECK-LABEL: mulw576:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 576
+; CHECK-NEXT:    mulw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i32 %a, 576
+  ret i32 %c
+}
+
+define i64 @add4104(i64 %a) {
+; RV64I-LABEL: add4104:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: add4104:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a1, 1026
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = add i64 %a, 4104
+  ret i64 %c
+}
+
+define i64 @add8208(i64 %a) {
+; RV64I-LABEL: add8208:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 2
+; RV64I-NEXT:    addiw a1, a1, 16
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: add8208:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    li a1, 1026
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+  %c = add i64 %a, 8208
+  ret i64 %c
+}
+
+; Make sure we prefer LUI for the 8192 instead of using sh3add.
+define signext i32 @add8192_i32(i32 signext %a) {
+; CHECK-LABEL: add8192_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 2
+; CHECK-NEXT:    addw a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = add i32 %a, 8192
+  ret i32 %c
+}
+
+; Make sure we prefer LUI for the 8192 instead of using sh3add.
+define i64 @add8192(i64 %a) {
+; CHECK-LABEL: add8192:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 2
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = add i64 %a, 8192
+  ret i64 %c
+}
+
+define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 6
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 1
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 6
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_6(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_6:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 6
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_6:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 6
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 7
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_7:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 2
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 7
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_7(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 7
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_7:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 7
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: addshl32_5_8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl32_5_8:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 3
+; RV64ZBA-NEXT:    add a0, a1, a0
+; RV64ZBA-NEXT:    slliw a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i32 %a, 5
+  %d = shl i32 %b, 8
+  %e = add i32 %c, %d
+  ret i32 %e
+}
+
+define i64 @addshl64_5_8(i64 %a, i64 %b) {
+; RV64I-LABEL: addshl64_5_8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addshl64_5_8:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    slli a0, a0, 5
+; RV64ZBA-NEXT:    ret
+  %c = shl i64 %a, 5
+  %d = shl i64 %b, 8
+  %e = add i64 %c, %d
+  ret i64 %e
+}
+
+; Make sure we use sext.h+slli+srli for Zba+Zbb.
+; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
+define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
+; RV64I-LABEL: sext_ashr_zext_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 31
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
+; RV64ZBANOZBB:       # %bb.0:
+; RV64ZBANOZBB-NEXT:    slli a0, a0, 24
+; RV64ZBANOZBB-NEXT:    sraiw a0, a0, 31
+; RV64ZBANOZBB-NEXT:    zext.w a0, a0
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
+; RV64ZBAZBB:       # %bb.0:
+; RV64ZBAZBB-NEXT:    sext.b a0, a0
+; RV64ZBAZBB-NEXT:    sraiw a0, a0, 9
+; RV64ZBAZBB-NEXT:    zext.w a0, a0
+; RV64ZBAZBB-NEXT:    ret
+  %ext = sext i8 %a to i32
+  %1 = ashr i32 %ext, 9
+  ret i32 %1
+}
+
+; Make sure we use sext.h+slli+srli for Zba+Zbb.
+; FIXME: The RV64I and Zba only cases can be done with only 3 shifts.
+define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
+; RV64I-LABEL: sext_ashr_zext_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 25
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
+; RV64ZBANOZBB:       # %bb.0:
+; RV64ZBANOZBB-NEXT:    slli a0, a0, 16
+; RV64ZBANOZBB-NEXT:    sraiw a0, a0, 25
+; RV64ZBANOZBB-NEXT:    zext.w a0, a0
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
+; RV64ZBAZBB:       # %bb.0:
+; RV64ZBAZBB-NEXT:    slli a0, a0, 48
+; RV64ZBAZBB-NEXT:    srai a0, a0, 57
+; RV64ZBAZBB-NEXT:    zext.w a0, a0
+; RV64ZBAZBB-NEXT:    ret
+  %ext = sext i16 %a to i32
+  %1 = ashr i32 %ext, 9
+  ret i32 %1
+}
+
+; This the IR you get from InstCombine if take the difference of 2 pointers and
+; cast is to unsigned before using as an index.
+define signext i16 @sh1adduw_ptrdiff(i64 %diff, ptr %baseptr) {
+; RV64I-LABEL: sh1adduw_ptrdiff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 33
+; RV64I-NEXT:    addi a2, a2, -2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh1adduw_ptrdiff:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 1
+; RV64ZBA-NEXT:    sh1add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptrdiff = lshr exact i64 %diff, 1
+  %cast = and i64 %ptrdiff, 4294967295
+  %ptr = getelementptr inbounds i16, ptr %baseptr, i64 %cast
+  %res = load i16, ptr %ptr
+  ret i16 %res
+}
+
+define signext i32 @sh2adduw_ptrdiff(i64 %diff, ptr %baseptr) {
+; RV64I-LABEL: sh2adduw_ptrdiff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 34
+; RV64I-NEXT:    addi a2, a2, -4
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh2adduw_ptrdiff:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 2
+; RV64ZBA-NEXT:    sh2add.uw a0, a0, a1
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptrdiff = lshr exact i64 %diff, 2
+  %cast = and i64 %ptrdiff, 4294967295
+  %ptr = getelementptr inbounds i32, ptr %baseptr, i64 %cast
+  %res = load i32, ptr %ptr
+  ret i32 %res
+}
+
+define i64 @sh3adduw_ptrdiff(i64 %diff, ptr %baseptr) {
+; RV64I-LABEL: sh3adduw_ptrdiff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 35
+; RV64I-NEXT:    addi a2, a2, -8
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: sh3adduw_ptrdiff:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 3
+; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %ptrdiff = lshr exact i64 %diff, 3
+  %cast = and i64 %ptrdiff, 4294967295
+  %ptr = getelementptr inbounds i64, ptr %baseptr, i64 %cast
+  %res = load i64, ptr %ptr
+  ret i64 %res
+}
+
+define signext i16 @srliw_1_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+define i128 @slliuw_ptrdiff(i64 %diff, ptr %baseptr) {
+; RV64I-LABEL: slliuw_ptrdiff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    slli a2, a2, 36
+; RV64I-NEXT:    addi a2, a2, -16
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a1, a1, a0
+; RV64I-NEXT:    ld a0, 0(a1)
+; RV64I-NEXT:    ld a1, 8(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slliuw_ptrdiff:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a0, a0, 4
+; RV64ZBA-NEXT:    slli.uw a0, a0, 4
+; RV64ZBA-NEXT:    add a1, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a1)
+; RV64ZBA-NEXT:    ld a1, 8(a1)
+; RV64ZBA-NEXT:    ret
+  %ptrdiff = lshr exact i64 %diff, 4
+  %cast = and i64 %ptrdiff, 4294967295
+  %ptr = getelementptr inbounds i128, ptr %baseptr, i64 %cast
+  %res = load i128, ptr %ptr
+  ret i128 %res
+}
+
+define signext i32 @srliw_2_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_3_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 3
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_3_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 3
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 3
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i32 @srliw_1_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_1_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_1_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 1
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define i64 @srliw_2_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i16 @srliw_2_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+
+define signext i32 @srliw_3_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_3_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 3
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_3_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 3
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 3
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @srliw_4_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: srliw_4_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 4
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srliw_4_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 4
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i32 %1, 4
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
+
+define signext i32 @srli_1_sh2add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_1_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_1_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 1
+  %4 = getelementptr inbounds i32, ptr %0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  ret i32 %5
+}
+
+define i64 @srli_2_sh3add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_2_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_2_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 2
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 2
+  %4 = getelementptr inbounds i64, ptr %0, i64 %3
+  %5 = load i64, ptr %4, align 8
+  ret i64 %5
+}
+
+define signext i16 @srli_2_sh1add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 2
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 2
+  %4 = getelementptr inbounds i16, ptr %0, i64 %3
+  %5 = load i16, ptr %4, align 2
+  ret i16 %5
+}
+
+define signext i32 @srli_3_sh2add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_3_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_3_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 3
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 3
+  %4 = getelementptr inbounds i32, ptr %0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  ret i32 %5
+}
+
+define i64 @srli_4_sh3add(ptr %0, i64 %1) {
+; RV64I-LABEL: srli_4_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 1
+; RV64I-NEXT:    andi a1, a1, -8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: srli_4_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srli a1, a1, 4
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = lshr i64 %1, 4
+  %4 = getelementptr inbounds i64, ptr %0, i64 %3
+  %5 = load i64, ptr %4, align 8
+  ret i64 %5
+}
+
+define signext i16 @shl_2_sh1add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_2_sh1add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_2_sh1add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 2
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    lh a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 2
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i16, ptr %0, i64 %4
+  %6 = load i16, ptr %5, align 2
+  ret i16 %6
+}
+
+define signext i32 @shl_16_sh2add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_16_sh2add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_16_sh2add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 16
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    lw a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 16
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i32, ptr %0, i64 %4
+  %6 = load i32, ptr %5, align 4
+  ret i32 %6
+}
+
+define i64 @shl_31_sh3add(ptr %0, i32 signext %1) {
+; RV64I-LABEL: shl_31_sh3add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_31_sh3add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    slli a1, a1, 31
+; RV64ZBA-NEXT:    zext.w a1, a1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ld a0, 0(a0)
+; RV64ZBA-NEXT:    ret
+  %3 = shl i32 %1, 31
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, ptr %0, i64 %4
+  %6 = load i64, ptr %5, align 8
+  ret i64 %6
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
new file mode 100644
index 000000000000000..1ab37493b0ec61e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-intrinsic.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBB
+
+declare i32 @llvm.riscv.orc.b.i32(i32)
+
+define signext i32 @orcb32(i32 signext %a) nounwind {
+; RV64ZBB-LABEL: orcb32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a)
+  ret i32 %tmp
+}
+
+define zeroext i32 @orcb32_zext(i32 zeroext %a) nounwind {
+; RV64ZBB-LABEL: orcb32_zext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a)
+  ret i32 %tmp
+}
+
+; Second and+or is redundant with the first, make sure we remove them.
+define signext i32 @orcb32_knownbits(i32 signext %a) nounwind {
+; RV64ZBB-LABEL: orcb32_knownbits:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 1044480
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, 2048
+; RV64ZBB-NEXT:    addi a1, a1, 1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = and i32 %a, 4278190080 ; 0xFF000000
+  %tmp2 = or i32 %tmp, 8388609 ; 0x800001
+  %tmp3 = call i32 @llvm.riscv.orc.b.i32(i32 %tmp2)
+  %tmp4 = and i32 %tmp3, 4278190080 ; 0xFF000000
+  %tmp5 = or i32 %tmp4, 16711935 ; 0xFF00FF
+  ret i32 %tmp5
+}
+
+declare i64 @llvm.riscv.orc.b.i64(i64)
+
+define i64 @orcb64(i64 %a) nounwind {
+; RV64ZBB-LABEL: orcb64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.orc.b.i64(i64 %a)
+  ret i64 %tmp
+}
+
+; Second and+or is redundant with the first, make sure we remove them.
+define i64 @orcb64_knownbits(i64 %a) nounwind {
+; RV64ZBB-LABEL: orcb64_knownbits:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 65535
+; RV64ZBB-NEXT:    slli a1, a1, 12
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, 256
+; RV64ZBB-NEXT:    addiw a1, a1, 8
+; RV64ZBB-NEXT:    slli a2, a1, 42
+; RV64ZBB-NEXT:    add a1, a1, a2
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    orc.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = and i64 %a, 1099494850560 ; 0x000000ffff000000
+  %tmp2 = or i64 %tmp, 4611721202800525320 ; 0x4000200000100008
+  %tmp3 = call i64 @llvm.riscv.orc.b.i64(i64 %tmp2)
+  %tmp4 = and i64 %tmp3, 1099494850560 ; 0x000000ffff000000
+  %tmp5 = or i64 %tmp4, 18374966855153418495 ; 0xff00ff0000ff00ff
+  ret i64 %tmp5
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
new file mode 100644
index 000000000000000..e6e9829c16f22b8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb-zbkb.ll
@@ -0,0 +1,600 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBB-ZBKB,RV64ZBKB
+
+define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: orn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: orn_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: orn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: orn_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: xnor_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: xnor_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: xnor_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: xnor_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: rol_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a2, a1, -1
+; RV64I-NEXT:    sllw a1, a0, a1
+; RV64I-NEXT:    negw a2, a2
+; RV64I-NEXT:    srlw a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+; Similar to rol_i32, but doesn't sign extend the result.
+define void @rol_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
+; RV64I-LABEL: rol_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a1, -1
+; RV64I-NEXT:    sllw a1, a0, a1
+; RV64I-NEXT:    negw a3, a3
+; RV64I-NEXT:    srlw a0, a0, a3
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sw a0, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
+; RV64I-LABEL: rol_i32_neg_constant_rhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a0, -1
+; RV64I-NEXT:    li a2, -2
+; RV64I-NEXT:    sllw a0, a2, a0
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    srlw a1, a2, a1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i32_neg_constant_rhs:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 -2, i32 -2, i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: rol_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sll a2, a0, a1
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rol_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rol a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: ror_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a2, a1, -1
+; RV64I-NEXT:    srlw a1, a0, a1
+; RV64I-NEXT:    negw a2, a2
+; RV64I-NEXT:    sllw a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+; Similar to ror_i32, but doesn't sign extend the result.
+define void @ror_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
+; RV64I-LABEL: ror_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a1, -1
+; RV64I-NEXT:    srlw a1, a0, a1
+; RV64I-NEXT:    negw a3, a3
+; RV64I-NEXT:    sllw a0, a0, a3
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sw a0, 0(a2)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
+; RV64I-LABEL: ror_i32_neg_constant_rhs:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a0, -1
+; RV64I-NEXT:    li a2, -2
+; RV64I-NEXT:    srlw a0, a2, a0
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i32_neg_constant_rhs:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rorw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 -2, i32 -2, i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: ror_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a2, a0, a1
+; RV64I-NEXT:    negw a1, a1
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: ror_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    ror a0, a0, a1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32_fshl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    slliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshl:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+; Similar to rori_i32_fshl, but doesn't sign extend the result.
+define void @rori_i32_fshl_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: rori_i32_fshl_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshl_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  store i32 %1, ptr %x
+  ret void
+}
+
+define signext i32 @rori_i32_fshr(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32_fshr:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slliw a1, a0, 1
+; RV64I-NEXT:    srliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshr:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+; Similar to rori_i32_fshr, but doesn't sign extend the result.
+define void @rori_i32_fshr_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: rori_i32_fshr_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    srliw a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i32_fshr_nosext:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  store i32 %1, ptr %x
+  ret void
+}
+
+; This test is similar to the type legalized version of the fshl/fshr tests, but
+; instead of having the same input to both shifts it has different inputs. Make
+; sure we don't match it as a roriw.
+define signext i32 @not_rori_i32(i32 signext %x, i32 signext %y) nounwind {
+; CHECK-LABEL: not_rori_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slliw a0, a0, 31
+; CHECK-NEXT:    srliw a1, a1, 1
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    ret
+  %a = shl i32 %x, 31
+  %b = lshr i32 %y, 1
+  %c = or i32 %a, %b
+  ret i32 %c
+}
+
+; This is similar to the type legalized roriw pattern, but the and mask is more
+; than 32 bits so the lshr doesn't shift zeroes into the lower 32 bits. Make
+; sure we don't match it to roriw.
+define i64 @roriw_bug(i64 %x) nounwind {
+; CHECK-LABEL: roriw_bug:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a1, a0, 31
+; CHECK-NEXT:    andi a2, a0, -2
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    xor a0, a2, a0
+; CHECK-NEXT:    ret
+  %a = shl i64 %x, 31
+  %b = and i64 %x, 18446744073709551614
+  %c = lshr i64 %b, 1
+  %d = or i64 %a, %c
+  %e = shl i64 %d, 32
+  %f = ashr i64 %e, 32
+  %g = xor i64 %b, %f ; to increase the use count on %b to disable SimplifyDemandedBits.
+  ret i64 %g
+}
+
+define i64 @rori_i64_fshl(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64_fshl:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i64_fshl:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rori a0, a0, 1
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i64 @rori_i64_fshr(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64_fshr:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: rori_i64_fshr:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    rori a0, a0, 63
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define signext i32 @not_shl_one_i32(i32 signext %x) {
+; RV64I-LABEL: not_shl_one_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sllw a0, a1, a0
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: not_shl_one_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rolw a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = shl i32 1, %x
+  %2 = xor i32 %1, -1
+  ret i32 %2
+}
+
+define i64 @not_shl_one_i64(i64 %x) {
+; RV64I-LABEL: not_shl_one_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sll a0, a1, a0
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: not_shl_one_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBKB-NEXT:    rol a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %1 = shl i64 1, %x
+  %2 = xor i64 %1, -1
+  ret i64 %2
+}
+
+define i8 @srli_i8(i8 %a) nounwind {
+; CHECK-LABEL: srli_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, 192
+; CHECK-NEXT:    srliw a0, a0, 6
+; CHECK-NEXT:    ret
+  %1 = lshr i8 %a, 6
+  ret i8 %1
+}
+
+; We could use sext.b+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i8 @srai_i8(i8 %a) nounwind {
+; RV64I-LABEL: srai_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    sraiw a0, a0, 29
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srai_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srai a0, a0, 61
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srai_i8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 24
+; RV64ZBKB-NEXT:    sraiw a0, a0, 29
+; RV64ZBKB-NEXT:    ret
+  %1 = ashr i8 %a, 5
+  ret i8 %1
+}
+
+; We could use zext.h+srli, but slli+srli offers more opportunities for
+; comppressed instructions.
+define i16 @srli_i16(i16 %a) nounwind {
+; RV64I-LABEL: srli_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    srliw a0, a0, 6
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srli_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    srliw a0, a0, 6
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srli_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    srliw a0, a0, 6
+; RV64ZBKB-NEXT:    ret
+  %1 = lshr i16 %a, 6
+  ret i16 %1
+}
+
+; We could use sext.h+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i16 @srai_i16(i16 %a) nounwind {
+; RV64I-LABEL: srai_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 16
+; RV64I-NEXT:    sraiw a0, a0, 25
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: srai_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    slli a0, a0, 48
+; RV64ZBB-NEXT:    srai a0, a0, 57
+; RV64ZBB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: srai_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    slli a0, a0, 16
+; RV64ZBKB-NEXT:    sraiw a0, a0, 25
+; RV64ZBKB-NEXT:    ret
+  %1 = ashr i16 %a, 9
+  ret i16 %1
+}
+
+define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_seqz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_seqz_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp eq i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_seqz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_seqz_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp eq i64 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_snez_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_snez_i32:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp ne i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_snez_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-ZBKB-LABEL: andn_snez_i64:
+; RV64ZBB-ZBKB:       # %bb.0:
+; RV64ZBB-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp ne i64 %and, %b
+  ret i1 %cmpeq
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
new file mode 100644
index 000000000000000..acc175186b85863
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
@@ -0,0 +1,1068 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBB
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    j .LBB1_3
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:  .LBB1_3: # %cond.end
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: log2_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    li a1, 31
+; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  %2 = sub i32 31, %1
+  ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    li s0, 32
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    beqz a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a1, a0, 24
+; RV64I-NEXT:  .LBB2_2: # %cond.end
+; RV64I-NEXT:    subw a0, s0, a1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: log2_ceil_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    li a1, 32
+; RV64ZBB-NEXT:    subw a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %1 = sub i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  %3 = sub i32 32, %2
+  ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    or a0, s0, a0
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    xori a0, a0, 31
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: findLastSet_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a1, a0
+; RV64ZBB-NEXT:    xori a1, a1, 31
+; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    addiw a0, a0, -1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %2 = xor i32 31, %1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 -1, i32 %2
+  ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_lshr_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srliw a0, a0, 1
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = lshr i32 %a, 1
+  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+  ret i32 %2
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctlz_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clz a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    li a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    negw a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_zero_undef_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    snez a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: findFirstSet_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a1, a0
+; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    addiw a0, a0, -1
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = icmp eq i32 %a, 0
+  %3 = select i1 %2, i32 -1, i32 %1
+  ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    negw a0, a0
+; RV64I-NEXT:    and a0, s0, a0
+; RV64I-NEXT:    lui a1, 30667
+; RV64I-NEXT:    addiw a1, a1, 1329
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 27
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    seqz a1, s0
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ffs_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a1, a0
+; RV64ZBB-NEXT:    addi a1, a1, 1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  %2 = add i32 %1, 1
+  %3 = icmp eq i32 %a, 0
+  %4 = select i1 %3, i32 0, i32 %2
+  ret i32 %4
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 58
+; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
+; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: cttz_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define signext i32 @ctpop_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctpop_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+define signext i32 @ctpop_i32_load(ptr %p) nounwind {
+; RV64I-LABEL: ctpop_i32_load:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srliw a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srliw a0, a0, 24
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i32_load:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lw a0, 0(a0)
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    ret
+  %a = load i32, ptr %p
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctpop_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    call __muldi3 at plt
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sextb_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sextb_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.b a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sexth_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: sexth_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: min_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB18_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB18_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: min_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp slt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: min_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB19_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: min_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp slt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: max_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB20_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB20_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: max_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: max_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB21_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB21_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: max_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: minu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB22_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB22_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: minu_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ult i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: minu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB23_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB23_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: minu_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ult i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: maxu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB24_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB24_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: maxu_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: maxu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB25_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB25_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: maxu_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %cmp = icmp ugt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+declare i32 @llvm.abs.i32(i32, i1 immarg)
+
+define i32 @abs_i32(i32 %x) {
+; RV64I-LABEL: abs_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    xor a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
+  ret i32 %abs
+}
+
+define signext i32 @abs_i32_sext(i32 signext %x) {
+; RV64I-LABEL: abs_i32_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i32_sext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    xor a0, a0, a1
+; RV64ZBB-NEXT:    subw a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
+  ret i32 %abs
+}
+
+declare i64 @llvm.abs.i64(i64, i1 immarg)
+
+define i64 @abs_i64(i64 %x) {
+; RV64I-LABEL: abs_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: abs_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
+  ret i64 %abs
+}
+
+define i32 @zexth_i32(i32 %a) nounwind {
+; RV64I-LABEL: zexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: zexth_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+define i64 @zexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: zexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: zexth_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    ret
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slliw a2, a2, 8
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srai a0, a0, 32
+; RV64ZBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+; Similar to bswap_i32 but the result is not sign extended.
+define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
+; RV64I-LABEL: bswap_i32_nosext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a2, a0, 8
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addi a3, a3, -256
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    and a3, a0, a3
+; RV64I-NEXT:    slli a3, a3, 8
+; RV64I-NEXT:    slli a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    sw a0, 0(a1)
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i32_nosext:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %1, ptr %x
+  ret void
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    srli a5, a0, 8
+; RV64I-NEXT:    srliw a5, a5, 24
+; RV64I-NEXT:    slli a5, a5, 24
+; RV64I-NEXT:    or a3, a5, a3
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    and a4, a0, a4
+; RV64I-NEXT:    slli a4, a4, 24
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a3, a4, a3
+; RV64I-NEXT:    and a2, a0, a2
+; RV64I-NEXT:    slli a2, a2, 40
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: bswap_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
new file mode 100644
index 000000000000000..9b37e8729576ff5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-intrinsic.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC
+
+declare i64 @llvm.riscv.clmulr.i64(i64 %a, i64 %b)
+
+define i64 @clmul64r(i64 %a, i64 %b) nounwind {
+; RV64ZBC-LABEL: clmul64r:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmulr.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32r(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-LABEL: clmul32r:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    slli a1, a1, 32
+; RV64ZBC-NEXT:    slli a0, a0, 32
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
+; RV64ZBC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+; FIXME: We could avoid the slli instructions by using clmul+srli+sext.w since
+; the inputs are zero extended.
+define signext i32 @clmul32r_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-LABEL: clmul32r_zext:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    slli a1, a1, 32
+; RV64ZBC-NEXT:    slli a0, a0, 32
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
+; RV64ZBC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
new file mode 100644
index 000000000000000..e0c9740a9c4bb29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbc-zbkc-intrinsic.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+; RUN: llc -mtriple=riscv64 -mattr=+zbkc -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+
+declare i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+
+define i64 @clmul64(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+
+define i64 @clmul64h(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64h:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    sext.w a0, a0
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+
+define signext i32 @clmul32h(i32 signext %a, i32 signext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32h:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
+; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+; FIXME: We could avoid the slli instructions by using clmul+srai since the
+; inputs are zero extended.
+define signext i32 @clmul32h_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32h_zext:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
+; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
new file mode 100644
index 000000000000000..3169f65f646718b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbkb-intrinsic.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBKB
+
+declare i64 @llvm.riscv.brev8.i64(i64)
+
+define i64 @brev8(i64 %a) nounwind {
+; RV64ZBKB-LABEL: brev8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %val = call i64 @llvm.riscv.brev8.i64(i64 %a)
+  ret i64 %val
+}
+
+; Test that rev8 is recognized as preserving zero extension.
+define zeroext i16 @brev8_knownbits(i16 zeroext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_knownbits:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %zext = zext i16 %a to i64
+  %val = call i64 @llvm.riscv.brev8.i64(i64 %zext)
+  %trunc = trunc i64 %val to i16
+  ret i16 %trunc
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @rev8_i64(i64 %a) {
+; RV64ZBKB-LABEL: rev8_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.riscv.brev8.i32(i32)
+
+define signext i32 @brev8_i32(i32 signext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    sext.w a0, a0
+; RV64ZBKB-NEXT:    ret
+  %val = call i32 @llvm.riscv.brev8.i32(i32 %a)
+  ret i32 %val
+}
+
+; Test that rev8 is recognized as preserving zero extension.
+define zeroext i16 @brev8_i32_knownbits(i16 zeroext %a) nounwind {
+; RV64ZBKB-LABEL: brev8_i32_knownbits:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %zext = zext i16 %a to i32
+  %val = call i32 @llvm.riscv.brev8.i32(i32 %zext)
+  %trunc = trunc i32 %val to i16
+  ret i16 %trunc
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @rev8_i32(i32 signext %a) {
+; RV64ZBKB-LABEL: rev8_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srai a0, a0, 32
+; RV64ZBKB-NEXT:    ret
+  %1 = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
new file mode 100644
index 000000000000000..c4680a5e15120f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbs.ll
@@ -0,0 +1,1000 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+zbs -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefixes=CHECK,RV64ZBS
+
+define signext i32 @bclr_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define signext i32 @bclr_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define signext i32 @bclr_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: bclr_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bclr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclr_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclr a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %b, 63
+  %shl = shl nuw i64 1, %and
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define i64 @bclr_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bclr_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclr_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclr a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %b
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define signext i32 @bset_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+define signext i32 @bset_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+define signext i32 @bset_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: bset_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+; We can use bsetw for 1 << x by setting the first source to zero.
+define signext i32 @bset_i32_zero(i32 signext %a) nounwind {
+; CHECK-LABEL: bset_i32_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    sllw a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %a
+  ret i32 %shl
+}
+
+define i64 @bset_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bset_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %or = or i64 %shl, %a
+  ret i64 %or
+}
+
+define i64 @bset_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bset_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %b
+  %or = or i64 %shl, %a
+  ret i64 %or
+}
+
+; We can use bsetw for 1 << x by setting the first source to zero.
+define signext i64 @bset_i64_zero(i64 signext %a) nounwind {
+; RV64I-LABEL: bset_i64_zero:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    sll a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bset_i64_zero:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bset a0, zero, a0
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %a
+  ret i64 %shl
+}
+
+define signext i32 @binv_i32(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define signext i32 @binv_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32_no_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %shl = shl i32 1, %b
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define signext i32 @binv_i32_load(ptr %p, i32 signext %b) nounwind {
+; CHECK-LABEL: binv_i32_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a0, 0(a0)
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:    sllw a1, a2, a1
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+  %a = load i32, ptr %p
+  %shl = shl i32 1, %b
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define i64 @binv_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: binv_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binv_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binv a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %xor = xor i64 %shl, %a
+  ret i64 %xor
+}
+
+define i64 @binv_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: binv_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binv_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binv a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shl = shl nuw i64 1, %b
+  %xor = xor i64 %shl, %a
+  ret i64 %xor
+}
+
+define signext i32 @bext_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: bext_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i32:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    andi a1, a1, 31
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shr = lshr i32 %a, %and
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+define signext i32 @bext_i32_no_mask(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: bext_i32_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i32_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i32 %a, %b
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+; This gets previous converted to (i1 (truncate (srl X, Y)). Make sure we are
+; able to use bext.
+define void @bext_i32_trunc(i32 signext %0, i32 signext %1) {
+; RV64I-LABEL: bext_i32_trunc:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB19_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    tail bar at plt
+;
+; RV64ZBS-LABEL: bext_i32_trunc:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    beqz a0, .LBB19_2
+; RV64ZBS-NEXT:  # %bb.1:
+; RV64ZBS-NEXT:    ret
+; RV64ZBS-NEXT:  .LBB19_2:
+; RV64ZBS-NEXT:    tail bar at plt
+  %3 = shl i32 1, %1
+  %4 = and i32 %3, %0
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %6, label %7
+
+6:                                                ; preds = %2
+  tail call void @bar()
+  br label %7
+
+7:                                                ; preds = %6, %2
+  ret void
+}
+
+declare void @bar()
+
+define i64 @bext_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bext_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shr = lshr i64 %a, %conv
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define i64 @bext_i64_no_mask(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: bext_i64_no_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bext_i64_no_mask:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bext a0, a0, a1
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i64 %a, %b
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define signext i32 @bexti_i32(i32 signext %a) nounwind {
+; CHECK-LABEL: bexti_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a0, a0, 5
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %shr = lshr i32 %a, 5
+  %and = and i32 %shr, 1
+  ret i32 %and
+}
+
+define i64 @bexti_i64(i64 %a) nounwind {
+; RV64I-LABEL: bexti_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %shr = lshr i64 %a, 5
+  %and = and i64 %shr, 1
+  ret i64 %and
+}
+
+define signext i32 @bexti_i32_cmp(i32 signext %a) nounwind {
+; RV64I-LABEL: bexti_i32_cmp:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i32_cmp:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, 32
+  %cmp = icmp ne i32 %and, 0
+  %zext = zext i1 %cmp to i32
+  ret i32 %zext
+}
+
+define i64 @bexti_i64_cmp(i64 %a) nounwind {
+; RV64I-LABEL: bexti_i64_cmp:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 58
+; RV64I-NEXT:    srli a0, a0, 63
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bexti_i64_cmp:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bexti a0, a0, 5
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, 32
+  %cmp = icmp ne i64 %and, 0
+  %zext = zext i1 %cmp to i64
+  ret i64 %zext
+}
+
+define signext i32 @bclri_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: bclri_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, -1025
+; CHECK-NEXT:    ret
+  %and = and i32 %a, -1025
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: bclri_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048575
+; RV64I-NEXT:    addiw a1, a1, 2047
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, -2049
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: bclri_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 786432
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %and = and i32 %a, -1073741825
+  ret i32 %and
+}
+
+define signext i32 @bclri_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: bclri_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 33
+; CHECK-NEXT:    srli a0, a0, 33
+; CHECK-NEXT:    ret
+  %and = and i32 %a, -2147483649
+  ret i32 %and
+}
+
+define i64 @bclri_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: bclri_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, -1025
+; CHECK-NEXT:    ret
+  %and = and i64 %a, -1025
+  ret i64 %and
+}
+
+define i64 @bclri_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048575
+; RV64I-NEXT:    addiw a1, a1, 2047
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -2049
+  ret i64 %and
+}
+
+define i64 @bclri_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 786432
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -1073741825
+  ret i64 %and
+}
+
+define i64 @bclri_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -2147483649
+  ret i64 %and
+}
+
+define i64 @bclri_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -4611686018427387905
+  ret i64 %and
+}
+
+define i64 @bclri_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -9223372036854775809
+  ret i64 %and
+}
+
+define i64 @bclri_i64_large0(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_large0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044480
+; RV64I-NEXT:    addiw a1, a1, -256
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_large0:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    andi a0, a0, -256
+; RV64ZBS-NEXT:    bclri a0, a0, 24
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -16777472
+  ret i64 %and
+}
+
+define i64 @bclri_i64_large1(i64 %a) nounwind {
+; RV64I-LABEL: bclri_i64_large1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044464
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bclri_i64_large1:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bclri a0, a0, 16
+; RV64ZBS-NEXT:    bclri a0, a0, 24
+; RV64ZBS-NEXT:    ret
+  %and = and i64 %a, -16842753
+  ret i64 %and
+}
+
+define signext i32 @bseti_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: bseti_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %or = or i32 %a, 1024
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: bseti_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %or = or i32 %a, 2048
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: bseti_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %or = or i32 %a, 1073741824
+  ret i32 %or
+}
+
+define signext i32 @bseti_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: bseti_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    ret
+  %or = or i32 %a, 2147483648
+  ret i32 %or
+}
+
+define i64 @bseti_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: bseti_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %or = or i64 %a, 1024
+  ret i64 %or
+}
+
+define i64 @bseti_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 2048
+  ret i64 %or
+}
+
+define i64 @bseti_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 1073741824
+  ret i64 %or
+}
+
+define i64 @bseti_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 2147483648
+  ret i64 %or
+}
+
+define i64 @bseti_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4611686018427387904
+  ret i64 %or
+}
+
+define i64 @bseti_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: bseti_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bseti_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 9223372036854775808
+  ret i64 %or
+}
+
+define signext i32 @binvi_i32_10(i32 signext %a) nounwind {
+; CHECK-LABEL: binvi_i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %xor = xor i32 %a, 1024
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_11(i32 signext %a) nounwind {
+; RV64I-LABEL: binvi_i32_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i32_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %xor = xor i32 %a, 2048
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_30(i32 signext %a) nounwind {
+; RV64I-LABEL: binvi_i32_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i32_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %xor = xor i32 %a, 1073741824
+  ret i32 %xor
+}
+
+define signext i32 @binvi_i32_31(i32 signext %a) nounwind {
+; CHECK-LABEL: binvi_i32_31:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    xor a0, a0, a1
+; CHECK-NEXT:    ret
+  %xor = xor i32 %a, 2147483648
+  ret i32 %xor
+}
+
+define i64 @binvi_i64_10(i64 %a) nounwind {
+; CHECK-LABEL: binvi_i64_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 1024
+; CHECK-NEXT:    ret
+  %xor = xor i64 %a, 1024
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_11(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_11:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 11
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_11:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 11
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 2048
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_30(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 262144
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_30:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 30
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 1073741824
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_31(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_31:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 31
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_31:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 31
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 2147483648
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_62(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 62
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_62:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 62
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4611686018427387904
+  ret i64 %xor
+}
+
+define i64 @binvi_i64_63(i64 %a) nounwind {
+; RV64I-LABEL: binvi_i64_63:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: binvi_i64_63:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 63
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 9223372036854775808
+  ret i64 %xor
+}
+
+define i64 @xor_i64_large(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_large:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_large:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    binvi a0, a0, 0
+; RV64ZBS-NEXT:    binvi a0, a0, 32
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4294967297
+  ret i64 %xor
+}
+
+define i64 @xor_i64_4099(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_4099:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 3
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_4099:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    xori a0, a0, 3
+; RV64ZBS-NEXT:    binvi a0, a0, 12
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 4099
+  ret i64 %xor
+}
+
+define i64 @xor_i64_96(i64 %a) nounwind {
+; CHECK-LABEL: xor_i64_96:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xori a0, a0, 96
+; CHECK-NEXT:    ret
+  %xor = xor i64 %a, 96
+  ret i64 %xor
+}
+
+define i64 @or_i64_large(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_large:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_large:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    bseti a0, a0, 0
+; RV64ZBS-NEXT:    bseti a0, a0, 32
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4294967297
+  ret i64 %or
+}
+
+define i64 @xor_i64_66901(i64 %a) nounwind {
+; RV64I-LABEL: xor_i64_66901:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 16
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: xor_i64_66901:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    xori a0, a0, 1365
+; RV64ZBS-NEXT:    binvi a0, a0, 16
+; RV64ZBS-NEXT:    ret
+  %xor = xor i64 %a, 66901
+  ret i64 %xor
+}
+
+define i64 @or_i64_4099(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_4099:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, 3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_4099:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    ori a0, a0, 3
+; RV64ZBS-NEXT:    bseti a0, a0, 12
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 4099
+  ret i64 %or
+}
+
+define i64 @or_i64_96(i64 %a) nounwind {
+; CHECK-LABEL: or_i64_96:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ori a0, a0, 96
+; CHECK-NEXT:    ret
+  %or = or i64 %a, 96
+  ret i64 %or
+}
+
+define i64 @or_i64_66901(i64 %a) nounwind {
+; RV64I-LABEL: or_i64_66901:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 16
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: or_i64_66901:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    ori a0, a0, 1365
+; RV64ZBS-NEXT:    bseti a0, a0, 16
+; RV64ZBS-NEXT:    ret
+  %or = or i64 %a, 66901
+  ret i64 %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
new file mode 100644
index 000000000000000..774d1398644b984
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
@@ -0,0 +1,1308 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN:   -riscv-experimental-rv64-legal-i32 | FileCheck %s -check-prefix=RV64
+
+;
+; Get the actual value of the overflow bit.
+;
+define zeroext i1 @saddo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: saddo1.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a3, a0, a1
+; RV64-NEXT:    slt a0, a3, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    sw a3, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo3.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, -4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: saddo4.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a2, 4096
+; RV64-NEXT:    addi a2, a2, -1
+; RV64-NEXT:    addw a2, a0, a2
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: saddo1.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a3, a0, a1
+; RV64-NEXT:    slt a0, a3, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    sd a3, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: saddo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: saddo3.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, -4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: uaddo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32.constant(i32 signext %v1, ptr %res) {
+; RV64-LABEL: uaddo.i32.constant:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, -2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 -2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32.constant_one(i32 signext %v1, ptr %res) {
+; RV64-LABEL: uaddo.i32.constant_one:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: uaddo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_one(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_one:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 1)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: ssubo1.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a3, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: ssubo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: ssubo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a3, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: usubo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32.constant.rhs(i32 signext %v1, ptr %res) {
+; RV64-LABEL: usubo.i32.constant.rhs:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addiw a2, a0, 2
+; RV64-NEXT:    sltu a0, a0, a2
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 -2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
+; RV64-LABEL: usubo.i32.constant.lhs:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, -2
+; RV64-NEXT:    subw a2, a2, a0
+; RV64-NEXT:    addi a0, a2, 1
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 -2, i32 %v1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: usubo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: smulo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 32
+; RV64-NEXT:    sraiw a3, a1, 31
+; RV64-NEXT:    xor a0, a0, a3
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: smulo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    mul a2, a0, a2
+; RV64-NEXT:    srai a0, a2, 32
+; RV64-NEXT:    sraiw a3, a2, 31
+; RV64-NEXT:    xor a0, a0, a3
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 13)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: smulo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a3, a0, a1
+; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 63
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sd a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: smulo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    mulh a3, a0, a2
+; RV64-NEXT:    mul a2, a0, a2
+; RV64-NEXT:    srai a0, a2, 63
+; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
+; RV64-LABEL: umulo.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a1, a0, a1
+; RV64-NEXT:    srai a0, a1, 32
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo2.i32(i32 signext %v1, ptr %res) {
+; RV64-LABEL: umulo2.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 13
+; RV64-NEXT:    slli a2, a2, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a2, a0, a2
+; RV64-NEXT:    srli a0, a2, 32
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 13)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, ptr %res
+  ret i1 %obit
+}
+
+; Similar to umulo.i32, but storing the overflow and returning the result.
+define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) {
+; RV64-LABEL: umulo3.i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    snez a1, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    ret
+  %4 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1)
+  %5 = extractvalue { i32, i1 } %4, 1
+  %6 = extractvalue { i32, i1 } %4, 0
+  %7 = zext i1 %5 to i32
+  store i32 %7, ptr %2, align 4
+  ret i32 %6
+}
+
+define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
+; RV64-LABEL: umulo.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a3, a0, a1
+; RV64-NEXT:    snez a3, a3
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
+; RV64-LABEL: umulo2.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a3, 13
+; RV64-NEXT:    mulhu a2, a0, a3
+; RV64-NEXT:    snez a2, a2
+; RV64-NEXT:    mul a0, a0, a3
+; RV64-NEXT:    sd a0, 0(a1)
+; RV64-NEXT:    mv a0, a2
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+
+;
+; Check the use of the overflow bit in combination with a select instruction.
+;
+define i32 @saddo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a2, a2, a0
+; RV64-NEXT:    slti a3, a1, 0
+; RV64-NEXT:    bne a3, a2, .LBB28_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB28_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @saddo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a2, a2, a0
+; RV64-NEXT:    slti a3, a1, 0
+; RV64-NEXT:    bne a3, a2, .LBB30_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB30_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @saddo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @uaddo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: uaddo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    bltu a2, a0, .LBB32_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB32_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @uaddo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: uaddo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    bltu a2, a0, .LBB34_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB34_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @uaddo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @ssubo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a3, a0, a1
+; RV64-NEXT:    slt a3, a3, a0
+; RV64-NEXT:    bne a2, a3, .LBB36_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB36_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @ssubo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssubo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    slt a3, a3, a0
+; RV64-NEXT:    bne a2, a3, .LBB38_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB38_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @ssub.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssub.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @usubo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    bltu a0, a2, .LBB40_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB40_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @usubo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a2, a0, a1
+; RV64-NEXT:    bltu a0, a2, .LBB42_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB42_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @usubo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @smulo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a2, a0, a1
+; RV64-NEXT:    srai a3, a2, 32
+; RV64-NEXT:    sraiw a2, a2, 31
+; RV64-NEXT:    bne a3, a2, .LBB44_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB44_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @smulo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    sraiw a0, a0, 31
+; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a3, a0, a1
+; RV64-NEXT:    srai a3, a3, 63
+; RV64-NEXT:    bne a2, a3, .LBB46_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB46_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i32 @umulo.select.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.select.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    slli a3, a0, 32
+; RV64-NEXT:    mulhu a2, a3, a2
+; RV64-NEXT:    srai a2, a2, 32
+; RV64-NEXT:    bnez a2, .LBB48_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB48_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i1 @umulo.not.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.not.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 32
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.select.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a2, a0, a1
+; RV64-NEXT:    bnez a2, .LBB50_2
+; RV64-NEXT:  # %bb.1: # %entry
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB50_2: # %entry
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.not.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = xor i1 %obit, true
+  ret i1 %ret
+}
+
+
+;
+; Check the use of the overflow bit in combination with a branch instruction.
+;
+define zeroext i1 @saddo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: saddo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    beq a1, a0, .LBB52_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB52_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: saddo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a2, a0, a1
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    slti a1, a1, 0
+; RV64-NEXT:    beq a1, a0, .LBB53_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB53_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
+; RV64-LABEL: uaddo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB54_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB54_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: uaddo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    bgeu a1, a0, .LBB55_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB55_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: ssubo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    beq a2, a0, .LBB56_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB56_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: ssubo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sgtz a2, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    slt a0, a1, a0
+; RV64-NEXT:    beq a2, a0, .LBB57_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB57_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: usubo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    bgeu a0, a1, .LBB58_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB58_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: usubo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    bgeu a0, a1, .LBB59_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB59_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: smulo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a1, a0, 32
+; RV64-NEXT:    sraiw a0, a0, 31
+; RV64-NEXT:    beq a1, a0, .LBB60_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB60_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: smulo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    beq a2, a0, .LBB61_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB61_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @smulo2.br.i64(i64 %v1) {
+; RV64-LABEL: smulo2.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, -13
+; RV64-NEXT:    mulh a2, a0, a1
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 63
+; RV64-NEXT:    beq a2, a0, .LBB62_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB62_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 -13)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i32(i32 signext %v1, i32 signext %v2) {
+; RV64-LABEL: umulo.br.i32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    srai a0, a0, 32
+; RV64-NEXT:    beqz a0, .LBB63_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB63_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
+; RV64-LABEL: umulo.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    mulhu a0, a0, a1
+; RV64-NEXT:    beqz a0, .LBB64_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB64_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo2.br.i64(i64 %v1) {
+; RV64-LABEL: umulo2.br.i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    add a1, a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB65_2
+; RV64-NEXT:  # %bb.1: # %overflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB65_2: # %continue
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @uaddo.i64.constant(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2048(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_2048:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2047
+; RV64-NEXT:    addi a2, a2, 1
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2048)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i64.constant_2049(i64 %v1, ptr %res) {
+; RV64-LABEL: uaddo.i64.constant_2049:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi a2, a0, 2047
+; RV64-NEXT:    addi a2, a2, 2
+; RV64-NEXT:    sltu a0, a2, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+entry:
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2049)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, ptr %res
+  ret i1 %obit
+}
+
+define i64 @uaddo.i64.constant_setcc_on_overflow_flag(ptr %p) {
+; RV64-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    addi a0, a1, 2
+; RV64-NEXT:    bltu a0, a1, .LBB69_2
+; RV64-NEXT:  # %bb.1: # %IfOverflow
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:  .LBB69_2: # %IfNoOverflow
+; RV64-NEXT:    ret
+entry:
+  %v1 = load i64, ptr %p
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %IfNoOverflow, label %IfOverflow
+IfOverflow:
+  ret i64 0
+IfNoOverflow:
+  ret i64 %val
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+



More information about the llvm-commits mailing list