[llvm] 98d4adc - [RISCV] Add custom isel to select (and (srl X, C1), C2) and (and (shl X, C1), C2)

Tue Jul 20 08:56:24 PDT 2021

Author: Craig Topper
Date: 2021-07-20T08:53:55-07:00
New Revision: 98d4adc2d1c12d034616557f3ab55ce88ecf569b

URL: https://github.com/llvm/llvm-project/commit/98d4adc2d1c12d034616557f3ab55ce88ecf569b
DIFF: https://github.com/llvm/llvm-project/commit/98d4adc2d1c12d034616557f3ab55ce88ecf569b.diff

LOG: [RISCV] Add custom isel to select (and (srl X, C1), C2) and (and (shl X, C1), C2)

Replace some existing isel patterns that are covered by the new
code. SLLIUWPat has been removed in favor of folding its root case
into the new code. The other uses in isel patterns for shXadd.uw
have been switched to using hardcoded AND masks.

This is based on the original version of D49585 from ARM. The final
version of that was made a DAG combine, but I've chosen to keep it
as custom isel. I'm not convinced DAG combine is as good with
shift pairs as it is with and+shift. I saw some issues optimizing
the shifts created by vscale lowering if an and isn't created for
from a shift pair.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D106230

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td
    llvm/test/CodeGen/RISCV/div.ll
    llvm/test/CodeGen/RISCV/rem.ll
    llvm/test/CodeGen/RISCV/rv32zbp.ll
    llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
    llvm/test/CodeGen/RISCV/rv64zbp.ll
    llvm/test/CodeGen/RISCV/srem-lkk.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index ca301dcca4da..9866567ac1ee 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -504,6 +504,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
     break;
   }
+  case ISD::AND: {
+    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+    if (!N1C)
+      break;
+
+    SDValue N0 = Node->getOperand(0);
+
+    bool LeftShift = N0.getOpcode() == ISD::SHL;
+    if (!LeftShift && N0.getOpcode() != ISD::SRL)
+      break;
+
+    auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!C)
+      break;
+    uint64_t C2 = C->getZExtValue();
+    unsigned XLen = Subtarget->getXLen();
+    if (!C2 || C2 >= XLen)
+      break;
+
+    uint64_t C1 = N1C->getZExtValue();
+
+    // Keep track of whether this is a andi, zext.h, or zext.w.
+    bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());
+    if (C1 == UINT64_C(0xFFFF) &&
+        (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))
+      ZExtOrANDI = true;
+    if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())
+      ZExtOrANDI = true;
+
+    // Clear irrelevant bits in the mask.
+    if (LeftShift)
+      C1 &= maskTrailingZeros<uint64_t>(C2);
+    else
+      C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
+
+    // Some transforms should only be done if the shift has a single use or
+    // the AND would become (srli (slli X, 32), 32)
+    bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
+
+    SDValue X = N0.getOperand(0);
+
+    // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
+    // with c3 leading zeros.
+    if (!LeftShift && isMask_64(C1)) {
+      uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+      if (C2 < C3) {
+        // If the number of leading zeros is C2+32 this can be SRLIW.
+        if (C2 + 32 == C3) {
+          SDNode *SRLIW =
+              CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
+                                     CurDAG->getTargetConstant(C2, DL, XLenVT));
+          ReplaceNode(Node, SRLIW);
+          return;
+        }
+
+        // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
+        // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
+        //
+        // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
+        // legalized and goes through DAG combine.
+        SDValue Y;
+        if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
+            selectSExti32(X, Y)) {
+          SDNode *SRAIW =
+              CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,
+                                     CurDAG->getTargetConstant(31, DL, XLenVT));
+          SDNode *SRLIW = CurDAG->getMachineNode(
+              RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
+              CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
+          ReplaceNode(Node, SRLIW);
+          return;
+        }
+
+        // (srli (slli x, c3-c2), c3).
+        if (OneUseOrZExtW && !ZExtOrANDI) {
+          SDNode *SLLI = CurDAG->getMachineNode(
+              RISCV::SLLI, DL, XLenVT, X,
+              CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+          SDNode *SRLI =
+              CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
+                                     CurDAG->getTargetConstant(C3, DL, XLenVT));
+          ReplaceNode(Node, SRLI);
+          return;
+        }
+      }
+    }
+
+    // Turn (and (shl x, c2) c1) -> (srli (slli c2+c3), c3) if c1 is a mask
+    // shifted by c2 bits with c3 leading zeros.
+    if (LeftShift && isShiftedMask_64(C1)) {
+      uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+
+      if (C2 + C3 < XLen &&
+          C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
+        // Use slli.uw when possible.
+        if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
+          SDNode *SLLIUW =
+              CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X,
+                                     CurDAG->getTargetConstant(C2, DL, XLenVT));
+          ReplaceNode(Node, SLLIUW);
+          return;
+        }
+
+        // (srli (slli c2+c3), c3)
+        if (OneUseOrZExtW && !ZExtOrANDI) {
+          SDNode *SLLI = CurDAG->getMachineNode(
+              RISCV::SLLI, DL, XLenVT, X,
+              CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+          SDNode *SRLI =
+              CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
+                                     CurDAG->getTargetConstant(C3, DL, XLenVT));
+          ReplaceNode(Node, SRLI);
+          return;
+        }
+      }
+    }
+
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntNo = Node->getConstantOperandVal(0);
     switch (IntNo) {
@@ -1377,41 +1496,6 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
   return false;
 }
 
-// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
-// on RV64).
-// SLLIUW is the same as SLLI except for the fact that it clears the bits
-// XLEN-1:32 of the input RS1 before shifting.
-// A PatFrag has already checked that it has the right structure:
-//
-//  (AND (SHL RS1, VC2), VC1)
-//
-// We check that VC2, the shamt is less than 32, otherwise the pattern is
-// exactly the same as SLLI and we give priority to that.
-// Eventually we check that VC1, the mask used to clear the upper 32 bits
-// of RS1, is correct:
-//
-//  VC1 == (0xFFFFFFFF << VC2)
-//
-bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
-  assert(N->getOpcode() == ISD::AND);
-  assert(N->getOperand(0).getOpcode() == ISD::SHL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  // The IsRV64 predicate is checked after PatFrag predicates so we can get
-  // here even on RV32.
-  if (!Subtarget->is64Bit())
-    return false;
-
-  SDValue Shl = N->getOperand(0);
-  uint64_t VC1 = N->getConstantOperandVal(1);
-  uint64_t VC2 = Shl.getConstantOperandVal(1);
-
-  // Immediate range should be enforced by uimm5 predicate.
-  assert(VC2 < 32 && "Unexpected immediate");
-  return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
-}
-
 // Select VL as a 5 bit immediate or a value that will become a register. This
 // allows us to choose betwen VSETIVLI or VSETVLI later.
 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index e8b79b008fa4..56d072206316 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -58,8 +58,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool selectSExti32(SDValue N, SDValue &Val);
   bool selectZExti32(SDValue N, SDValue &Val);
 
-  bool MatchSLLIUW(SDNode *N) const;
-
   bool selectVLOp(SDValue N, SDValue &VL);
 
   bool selectVSplat(SDValue N, SDValue &SplatVal);

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 65767ae1e682..949fff25e9e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -885,13 +885,6 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
 }]>;
 def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
 
-// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
-// on RV64). Also used to optimize the same sequence without SLLIUW.
-def SLLIUWPat : PatFrag<(ops node:$A, node:$B),
-                        (and (shl node:$A, node:$B), imm), [{
-  return MatchSLLIUW(N);
-}]>;
-
 def add_oneuse : PatFrag<(ops node:$A, node:$B), (add node:$A, node:$B), [{
   return N->hasOneUse();
 }]>;
@@ -1236,14 +1229,6 @@ def : Pat<(i64 (and GPR:$rs1, 0xffffffff)), (SRLI (SLLI GPR:$rs1, 32), 32)>;
 // shifts instead of 3. This can occur when unsigned is used to index an array.
 def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
           (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
-// shl/and can appear in the other order too.
-def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)),
-          (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
-
-// If we're shifting a value left by 0-31 bits, and then masking to 32-bits,
-// use 2 shifts instead of 3.
-def : Pat<(i64 (and (shl GPR:$rs1, uimm5:$shamt), 0xffffffff)),
-          (SRLI (SLLI GPR:$rs1, (ImmPlus32 uimm5:$shamt)), 32)>;
 }
 
 let Predicates = [IsRV64] in {

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 278a83194552..7d8d053cc584 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -998,8 +998,6 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
 } // Predicates = [HasStdExtZba]
 
 let Predicates = [HasStdExtZba, IsRV64] in {
-def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)),
-          (SLLIUW GPR:$rs1, uimm5:$shamt)>;
 def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
           (SLLIUW GPR:$rs1, uimm5:$shamt)>;
 def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
@@ -1013,11 +1011,11 @@ def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
           (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
 
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 1)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
           (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 2)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
           (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 3)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
           (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 

diff  --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index dd9e6fa1cc90..991c91cc70ea 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -711,18 +711,16 @@ define i32 @sdiv_pow2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: sdiv_pow2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 60
-; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 3
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: sdiv_pow2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 60
-; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    sraiw a0, a0, 3
 ; RV64IM-NEXT:    ret
@@ -749,22 +747,16 @@ define i32 @sdiv_pow2_2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: sdiv_pow2_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 47
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -1
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 16
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: sdiv_pow2_2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 47
-; RV64IM-NEXT:    lui a2, 16
-; RV64IM-NEXT:    addiw a2, a2, -1
-; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    sraiw a0, a0, 16
 ; RV64IM-NEXT:    ret
@@ -1280,10 +1272,9 @@ define i16 @sdiv16_constant(i16 %a) nounwind {
 ; RV64IM-NEXT:    lui a1, 6
 ; RV64IM-NEXT:    addiw a1, a1, 1639
 ; RV64IM-NEXT:    mul a0, a0, a1
-; RV64IM-NEXT:    srai a1, a0, 17
-; RV64IM-NEXT:    srli a0, a0, 31
-; RV64IM-NEXT:    andi a0, a0, 1
-; RV64IM-NEXT:    add a0, a1, a0
+; RV64IM-NEXT:    srliw a1, a0, 31
+; RV64IM-NEXT:    srai a0, a0, 17
+; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    ret
   %1 = sdiv i16 %a, 5
   ret i16 %1

diff  --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll
index 8712f532d5ae..cb7098cea611 100644
--- a/llvm/test/CodeGen/RISCV/rem.ll
+++ b/llvm/test/CodeGen/RISCV/rem.ll
@@ -138,9 +138,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: srem_pow2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 60
-; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 29
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    andi a1, a1, -8
 ; RV64I-NEXT:    subw a0, a0, a1
@@ -148,9 +147,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
 ;
 ; RV64IM-LABEL: srem_pow2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 60
-; RV64IM-NEXT:    andi a1, a1, 7
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 29
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    andi a1, a1, -8
 ; RV64IM-NEXT:    subw a0, a0, a1
@@ -182,11 +180,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: srem_pow2_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 47
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -1
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    lui a2, 1048560
 ; RV64I-NEXT:    and a1, a1, a2
@@ -195,11 +190,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
 ;
 ; RV64IM-LABEL: srem_pow2_2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 47
-; RV64IM-NEXT:    lui a2, 16
-; RV64IM-NEXT:    addiw a2, a2, -1
-; RV64IM-NEXT:    and a1, a1, a2
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 16
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    lui a2, 1048560
 ; RV64IM-NEXT:    and a1, a1, a2

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 6b32f993251b..6774f0da66ef 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -3348,10 +3348,8 @@ define i32 @packh_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: packh_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    srli a1, a1, 16
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
@@ -3375,10 +3373,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: packh_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    slli a1, a2, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    slli a1, a2, 24
+; RV32I-NEXT:    srli a1, a1, 16
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    mv a1, zero
 ; RV32I-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index 8c6c5d79de81..df0520aee11e 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -1652,8 +1652,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
 ; RV64I-LABEL: zext_sraiw_aext:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    srli a0, a0, 7
-; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    slli a0, a0, 25
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
   %1 = ashr i32 %a, 7
@@ -1663,8 +1662,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
 define zeroext i32 @zext_sraiw_sext(i32 signext %a) nounwind {
 ; RV64I-LABEL: zext_sraiw_sext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a0, a0, 8
-; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    slli a0, a0, 24
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
   %1 = ashr i32 %a, 8
@@ -1678,8 +1676,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
 ; RV64I-LABEL: zext_sraiw_zext:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    srli a0, a0, 9
-; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    slli a0, a0, 23
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
   %1 = ashr i32 %a, 9

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 00d3278a3283..171ee6557271 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -3882,10 +3882,8 @@ define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: packh_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    slli a1, a1, 56
+; RV64I-NEXT:    srli a1, a1, 48
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
@@ -3909,10 +3907,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-LABEL: packh_i64:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    slli a1, a1, 56
+; RV64I-NEXT:    srli a1, a1, 48
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index a2b7f16f0d01..8efffbabc74c 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -330,9 +330,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
 ;
 ; RV64I-LABEL: dont_fold_srem_power_of_two:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 57
-; RV64I-NEXT:    andi a1, a1, 63
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 26
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    andi a1, a1, -64
 ; RV64I-NEXT:    subw a0, a0, a1
@@ -340,9 +339,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
 ;
 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 57
-; RV64IM-NEXT:    andi a1, a1, 63
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 26
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    andi a1, a1, -64
 ; RV64IM-NEXT:    subw a0, a0, a1
@@ -385,24 +383,20 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
 ;
 ; RV64I-LABEL: dont_fold_srem_i32_smax:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    lui a2, 524288
-; RV64I-NEXT:    addiw a3, a2, -1
-; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 1
 ; RV64I-NEXT:    add a1, a0, a1
+; RV64I-NEXT:    lui a2, 524288
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: dont_fold_srem_i32_smax:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sext.w a1, a0
-; RV64IM-NEXT:    srli a1, a1, 32
-; RV64IM-NEXT:    lui a2, 524288
-; RV64IM-NEXT:    addiw a3, a2, -1
-; RV64IM-NEXT:    and a1, a1, a3
+; RV64IM-NEXT:    sraiw a1, a0, 31
+; RV64IM-NEXT:    srliw a1, a1, 1
 ; RV64IM-NEXT:    add a1, a0, a1
+; RV64IM-NEXT:    lui a2, 524288
 ; RV64IM-NEXT:    and a1, a1, a2
 ; RV64IM-NEXT:    addw a0, a0, a1
 ; RV64IM-NEXT:    ret