[llvm] b645bcd - [RISCV] Generalize (srl (and X, 0xffff), C) -> (srli (slli X, (XLen-16), (XLen-16) + C) optimization.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 9 23:38:01 PST 2022


Author: Craig Topper
Date: 2022-01-09T23:37:10-08:00
New Revision: b645bcd98a11c7857cdee51202c64d15b9a4f90d

URL: https://github.com/llvm/llvm-project/commit/b645bcd98a11c7857cdee51202c64d15b9a4f90d
DIFF: https://github.com/llvm/llvm-project/commit/b645bcd98a11c7857cdee51202c64d15b9a4f90d.diff

LOG: [RISCV] Generalize (srl (and X, 0xffff), C) -> (srli (slli X, (XLen-16), (XLen-16) + C) optimization.

This can be generalized to (srl (and X, C2), C) ->
(srli (slli X, (XLen-C3), (XLen-C3) + C). Where C2 is a mask with
C3 trailing ones.

This can avoid constant materialization for C2. This is beneficial
even when C2 can be selected to ANDI because the SLLI can become
C.SLLI, but C.ANDI cannot cover all the immediates of ANDI.

This also enables CSE in some cases of i8 sdiv by constant codegen.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/alu8.ll
    llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
    llvm/test/CodeGen/RISCV/div-by-constant.ll
    llvm/test/CodeGen/RISCV/div.ll
    llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
    llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll
    llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5afae3265f6c0..a49f685f8fa46 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -542,35 +542,38 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     return;
   }
   case ISD::SRL: {
-    // Optimize (srl (and X, 0xffff), C) ->
-    //          (srli (slli X, (XLen-16), (XLen-16) + C)
-    // Taking into account that the 0xffff may have had lower bits unset by
-    // SimplifyDemandedBits. This avoids materializing the 0xffff immediate.
-    // This pattern occurs when type legalizing i16 right shifts.
-    // FIXME: This could be extended to other AND masks.
+    // Optimize (srl (and X, C2), C) ->
+    //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
+    // Where C2 is a mask with C3 trailing ones.
+    // Taking into account that the C2 may have had lower bits unset by
+    // SimplifyDemandedBits. This avoids materializing the C2 immediate.
+    // This pattern occurs when type legalizing right shifts for types with
+    // less than XLen bits.
     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
-    if (N1C) {
-      uint64_t ShAmt = N1C->getZExtValue();
-      SDValue N0 = Node->getOperand(0);
-      if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
-          isa<ConstantSDNode>(N0.getOperand(1))) {
-        uint64_t Mask = N0.getConstantOperandVal(1);
-        Mask |= maskTrailingOnes<uint64_t>(ShAmt);
-        if (Mask == 0xffff) {
-          unsigned LShAmt = Subtarget->getXLen() - 16;
-          SDNode *SLLI =
-              CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
-                                     CurDAG->getTargetConstant(LShAmt, DL, VT));
-          SDNode *SRLI = CurDAG->getMachineNode(
-              RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
-              CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
-          ReplaceNode(Node, SRLI);
-          return;
-        }
-      }
-    }
-
-    break;
+    if (!N1C)
+      break;
+    SDValue N0 = Node->getOperand(0);
+    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+        !isa<ConstantSDNode>(N0.getOperand(1)))
+      break;
+    unsigned ShAmt = N1C->getZExtValue();
+    uint64_t Mask = N0.getConstantOperandVal(1);
+    Mask |= maskTrailingOnes<uint64_t>(ShAmt);
+    if (!isMask_64(Mask))
+      break;
+    unsigned TrailingOnes = countTrailingOnes(Mask);
+    // 32 trailing ones should use srliw via tablegen pattern.
+    if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
+      break;
+    unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
+    SDNode *SLLI =
+        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
+                               CurDAG->getTargetConstant(LShAmt, DL, VT));
+    SDNode *SRLI = CurDAG->getMachineNode(
+        RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
+    ReplaceNode(Node, SRLI);
+    return;
   }
   case ISD::SRA: {
     // Optimize (sra (sext_inreg X, i16), C) ->
@@ -587,7 +590,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     SDValue N0 = Node->getOperand(0);
     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
       break;
-    uint64_t ShAmt = N1C->getZExtValue();
+    unsigned ShAmt = N1C->getZExtValue();
     unsigned ExtSize =
         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
     // ExtSize of 32 should use sraiw via tablegen pattern.

diff  --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll
index 8611e752028d4..dafa328450d9d 100644
--- a/llvm/test/CodeGen/RISCV/alu8.ll
+++ b/llvm/test/CodeGen/RISCV/alu8.ll
@@ -135,14 +135,14 @@ define i8 @slli(i8 %a) nounwind {
 define i8 @srli(i8 %a) nounwind {
 ; RV32I-LABEL: srli:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 192
-; RV32I-NEXT:    srli a0, a0, 6
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 30
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: srli:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 192
-; RV64I-NEXT:    srli a0, a0, 6
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srli a0, a0, 62
 ; RV64I-NEXT:    ret
   %1 = lshr i8 %a, 6
   ret i8 %1

diff  --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index e7be4070fe023..74c2357fe7004 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -212,10 +212,8 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    slli a1, a0, 8
 ; RV32I-NEXT:    add a0, a1, a0
-; RV32I-NEXT:    lui a1, 2
-; RV32I-NEXT:    addi a1, a1, -256
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    slli a0, a0, 19
+; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB4_2:
 ; RV32I-NEXT:    li a0, 16
@@ -247,12 +245,10 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    lui a1, 1
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    add a0, a1, a0
-; RV64I-NEXT:    lui a1, 2
-; RV64I-NEXT:    addiw a1, a1, -256
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    slliw a1, a0, 8
+; RV64I-NEXT:    addw a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 51
+; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
 ; RV64I-NEXT:  .LBB4_2:
 ; RV64I-NEXT:    li a0, 16
@@ -605,10 +601,8 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    slli a1, a0, 8
 ; RV32I-NEXT:    add a0, a1, a0
-; RV32I-NEXT:    lui a1, 2
-; RV32I-NEXT:    addi a1, a1, -256
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    slli a0, a0, 19
+; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_cttz_i16_zero_undef:
@@ -632,12 +626,10 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
 ; RV64I-NEXT:    lui a1, 1
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    add a0, a1, a0
-; RV64I-NEXT:    lui a1, 2
-; RV64I-NEXT:    addiw a1, a1, -256
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    slliw a1, a0, 8
+; RV64I-NEXT:    addw a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 51
+; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
   %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
   ret i16 %tmp

diff  --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 2f13b18d0ac3c..f20ff9b6d4f94 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -163,8 +163,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
 ; RV32IM-NEXT:    mul a1, a1, a2
 ; RV32IM-NEXT:    srli a1, a1, 8
 ; RV32IM-NEXT:    sub a0, a0, a1
-; RV32IM-NEXT:    andi a0, a0, 254
-; RV32IM-NEXT:    srli a0, a0, 1
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    srli a0, a0, 25
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    srli a0, a0, 2
 ; RV32IM-NEXT:    ret
@@ -176,8 +176,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
 ; RV32IMZB-NEXT:    sh2add a1, a2, a1
 ; RV32IMZB-NEXT:    srli a1, a1, 8
 ; RV32IMZB-NEXT:    sub a0, a0, a1
-; RV32IMZB-NEXT:    andi a0, a0, 254
-; RV32IMZB-NEXT:    srli a0, a0, 1
+; RV32IMZB-NEXT:    slli a0, a0, 24
+; RV32IMZB-NEXT:    srli a0, a0, 25
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    srli a0, a0, 2
 ; RV32IMZB-NEXT:    ret
@@ -189,8 +189,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 8
 ; RV64IM-NEXT:    subw a0, a0, a1
-; RV64IM-NEXT:    andi a0, a0, 254
-; RV64IM-NEXT:    srli a0, a0, 1
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    srli a0, a0, 57
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    srli a0, a0, 2
 ; RV64IM-NEXT:    ret
@@ -202,8 +202,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
 ; RV64IMZB-NEXT:    sh2add a1, a2, a1
 ; RV64IMZB-NEXT:    srli a1, a1, 8
 ; RV64IMZB-NEXT:    subw a0, a0, a1
-; RV64IMZB-NEXT:    andi a0, a0, 254
-; RV64IMZB-NEXT:    srli a0, a0, 1
+; RV64IMZB-NEXT:    slli a0, a0, 56
+; RV64IMZB-NEXT:    srli a0, a0, 57
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    srli a0, a0, 2
 ; RV64IMZB-NEXT:    ret
@@ -618,8 +618,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
   ret i8 %1
 }
 
-; FIXME: Can shorten the code after the mul by using slli+srai/srli like the
-; i16 version without Zbb.
 define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV32IM-LABEL: sdiv8_constant_add_srai:
 ; RV32IM:       # %bb.0:
@@ -629,9 +627,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV32IM-NEXT:    mul a1, a1, a2
 ; RV32IM-NEXT:    srli a1, a1, 8
 ; RV32IM-NEXT:    add a0, a1, a0
-; RV32IM-NEXT:    andi a1, a0, 128
-; RV32IM-NEXT:    srli a1, a1, 7
 ; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    srli a1, a0, 31
 ; RV32IM-NEXT:    srai a0, a0, 26
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    ret
@@ -643,9 +640,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV32IMZB-NEXT:    mul a1, a1, a2
 ; RV32IMZB-NEXT:    srli a1, a1, 8
 ; RV32IMZB-NEXT:    add a0, a1, a0
-; RV32IMZB-NEXT:    andi a1, a0, 128
-; RV32IMZB-NEXT:    srli a1, a1, 7
 ; RV32IMZB-NEXT:    slli a0, a0, 24
+; RV32IMZB-NEXT:    srli a1, a0, 31
 ; RV32IMZB-NEXT:    srai a0, a0, 26
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
@@ -658,9 +654,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 8
 ; RV64IM-NEXT:    addw a0, a1, a0
-; RV64IM-NEXT:    andi a1, a0, 128
-; RV64IM-NEXT:    srli a1, a1, 7
 ; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    srli a1, a0, 63
 ; RV64IM-NEXT:    srai a0, a0, 58
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    ret
@@ -672,9 +667,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV64IMZB-NEXT:    mul a1, a1, a2
 ; RV64IMZB-NEXT:    srli a1, a1, 8
 ; RV64IMZB-NEXT:    addw a0, a1, a0
-; RV64IMZB-NEXT:    andi a1, a0, 128
-; RV64IMZB-NEXT:    srli a1, a1, 7
 ; RV64IMZB-NEXT:    slli a0, a0, 56
+; RV64IMZB-NEXT:    srli a1, a0, 63
 ; RV64IMZB-NEXT:    srai a0, a0, 58
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret
@@ -682,8 +676,6 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
   ret i8 %1
 }
 
-; FIXME: Can shorten the code after the mul by using slli+srai/srli like the
-; i16 version without Zbb.
 define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV32IM-LABEL: sdiv8_constant_sub_srai:
 ; RV32IM:       # %bb.0:
@@ -693,9 +685,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV32IM-NEXT:    mul a1, a1, a2
 ; RV32IM-NEXT:    srli a1, a1, 8
 ; RV32IM-NEXT:    sub a0, a1, a0
-; RV32IM-NEXT:    andi a1, a0, 128
-; RV32IM-NEXT:    srli a1, a1, 7
 ; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    srli a1, a0, 31
 ; RV32IM-NEXT:    srai a0, a0, 26
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    ret
@@ -707,9 +698,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV32IMZB-NEXT:    mul a1, a1, a2
 ; RV32IMZB-NEXT:    srli a1, a1, 8
 ; RV32IMZB-NEXT:    sub a0, a1, a0
-; RV32IMZB-NEXT:    andi a1, a0, 128
-; RV32IMZB-NEXT:    srli a1, a1, 7
 ; RV32IMZB-NEXT:    slli a0, a0, 24
+; RV32IMZB-NEXT:    srli a1, a0, 31
 ; RV32IMZB-NEXT:    srai a0, a0, 26
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
@@ -722,9 +712,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 8
 ; RV64IM-NEXT:    subw a0, a1, a0
-; RV64IM-NEXT:    andi a1, a0, 128
-; RV64IM-NEXT:    srli a1, a1, 7
 ; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    srli a1, a0, 63
 ; RV64IM-NEXT:    srai a0, a0, 58
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    ret
@@ -736,9 +725,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV64IMZB-NEXT:    mul a1, a1, a2
 ; RV64IMZB-NEXT:    srli a1, a1, 8
 ; RV64IMZB-NEXT:    subw a0, a1, a0
-; RV64IMZB-NEXT:    andi a1, a0, 128
-; RV64IMZB-NEXT:    srli a1, a1, 7
 ; RV64IMZB-NEXT:    slli a0, a0, 56
+; RV64IMZB-NEXT:    srli a1, a0, 63
 ; RV64IMZB-NEXT:    srai a0, a0, 58
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index af6855e94ff6c..3d4db5bbeb699 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -363,26 +363,26 @@ define i8 @udiv8_constant(i8 %a) nounwind {
 define i8 @udiv8_pow2(i8 %a) nounwind {
 ; RV32I-LABEL: udiv8_pow2:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 248
-; RV32I-NEXT:    srli a0, a0, 3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: udiv8_pow2:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    andi a0, a0, 248
-; RV32IM-NEXT:    srli a0, a0, 3
+; RV32IM-NEXT:    slli a0, a0, 24
+; RV32IM-NEXT:    srli a0, a0, 27
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: udiv8_pow2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 248
-; RV64I-NEXT:    srli a0, a0, 3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: udiv8_pow2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    andi a0, a0, 248
-; RV64IM-NEXT:    srli a0, a0, 3
+; RV64IM-NEXT:    slli a0, a0, 56
+; RV64IM-NEXT:    srli a0, a0, 59
 ; RV64IM-NEXT:    ret
   %1 = udiv i8 %a, 8
   ret i8 %1

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index a4fda68ba0219..f9cd53bdf965a 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -473,20 +473,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
 define i8 @srli_i8(i8 %a) nounwind {
 ; RV32I-LABEL: srli_i8:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 192
-; RV32I-NEXT:    srli a0, a0, 6
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 30
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: srli_i8:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andi a0, a0, 192
-; RV32ZBB-NEXT:    srli a0, a0, 6
+; RV32ZBB-NEXT:    slli a0, a0, 24
+; RV32ZBB-NEXT:    srli a0, a0, 30
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: srli_i8:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andi a0, a0, 192
-; RV32ZBP-NEXT:    srli a0, a0, 6
+; RV32ZBP-NEXT:    slli a0, a0, 24
+; RV32ZBP-NEXT:    srli a0, a0, 30
 ; RV32ZBP-NEXT:    ret
   %1 = lshr i8 %a, 6
   ret i8 %1

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
index 79a91979f2bd8..97093ea0a0529 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
@@ -544,20 +544,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
 define i8 @srli_i8(i8 %a) nounwind {
 ; RV64I-LABEL: srli_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 192
-; RV64I-NEXT:    srli a0, a0, 6
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srli a0, a0, 62
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: srli_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andi a0, a0, 192
-; RV64ZBB-NEXT:    srli a0, a0, 6
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srli a0, a0, 62
 ; RV64ZBB-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: srli_i8:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andi a0, a0, 192
-; RV64ZBP-NEXT:    srli a0, a0, 6
+; RV64ZBP-NEXT:    slli a0, a0, 56
+; RV64ZBP-NEXT:    srli a0, a0, 62
 ; RV64ZBP-NEXT:    ret
   %1 = lshr i8 %a, 6
   ret i8 %1

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 0d892e6508147..952d4c794275f 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -264,9 +264,8 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 8
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    lui a1, 524272
-; RV64I-NEXT:    and a1, a0, a1
-; RV64I-NEXT:    srli a1, a1, 16
+; RV64I-NEXT:    slli a1, a0, 33
+; RV64I-NEXT:    srli a1, a1, 49
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    srli a1, a0, 1

diff  --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 24a7c78d26667..810fee3464a46 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -380,14 +380,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ;
 ; RV64-LABEL: test_srem_vec:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -64
-; RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    addi sp, sp, -48
+; RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    mv s0, a0
 ; RV64-NEXT:    lb a0, 12(a0)
 ; RV64-NEXT:    lwu a1, 8(s0)
@@ -407,7 +406,6 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64-NEXT:    slli a1, a1, 31
 ; RV64-NEXT:    srai s2, a1, 31
 ; RV64-NEXT:    li a1, 7
-; RV64-NEXT:    li s5, 7
 ; RV64-NEXT:    call __moddi3 at plt
 ; RV64-NEXT:    mv s3, a0
 ; RV64-NEXT:    li a1, -5
@@ -432,9 +430,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64-NEXT:    neg a0, a0
 ; RV64-NEXT:    neg a2, a2
 ; RV64-NEXT:    neg a3, a1
-; RV64-NEXT:    slli a4, s5, 32
-; RV64-NEXT:    and a3, a3, a4
-; RV64-NEXT:    srli a3, a3, 32
+; RV64-NEXT:    slli a3, a3, 29
+; RV64-NEXT:    srli a3, a3, 61
 ; RV64-NEXT:    sb a3, 12(s0)
 ; RV64-NEXT:    slliw a1, a1, 2
 ; RV64-NEXT:    srli a3, s4, 31
@@ -446,14 +443,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64-NEXT:    slli a1, a2, 33
 ; RV64-NEXT:    or a0, a0, a1
 ; RV64-NEXT:    sd a0, 0(s0)
-; RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 64
+; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 48
 ; RV64-NEXT:    ret
 ;
 ; RV32M-LABEL: test_srem_vec:
@@ -592,10 +588,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64M-NEXT:    neg a1, a1
 ; RV64M-NEXT:    neg a4, a2
 ; RV64M-NEXT:    neg a3, a3
-; RV64M-NEXT:    li a5, 7
-; RV64M-NEXT:    slli a5, a5, 32
-; RV64M-NEXT:    and a4, a4, a5
-; RV64M-NEXT:    srli a4, a4, 32
+; RV64M-NEXT:    slli a4, a4, 29
+; RV64M-NEXT:    srli a4, a4, 61
 ; RV64M-NEXT:    sb a4, 12(a0)
 ; RV64M-NEXT:    slliw a2, a2, 2
 ; RV64M-NEXT:    srli a4, a6, 31

diff  --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 8c3870ee4070f..a7c2cdf122484 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -94,12 +94,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV32-NEXT:    addi a1, a1, -585
 ; RV32-NEXT:    call __mulsi3 at plt
 ; RV32-NEXT:    slli a1, a0, 26
-; RV32-NEXT:    lui a2, 32768
-; RV32-NEXT:    addi a3, a2, -2
-; RV32-NEXT:    and a0, a0, a3
-; RV32-NEXT:    srli a0, a0, 1
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    srli a0, a0, 6
 ; RV32-NEXT:    or a0, a0, a1
-; RV32-NEXT:    addi a1, a2, -1
+; RV32-NEXT:    lui a1, 32768
+; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    lui a1, 2341
 ; RV32-NEXT:    addi a1, a1, -1755
@@ -116,12 +115,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV64-NEXT:    addiw a1, a1, -585
 ; RV64-NEXT:    call __muldi3 at plt
 ; RV64-NEXT:    slli a1, a0, 26
-; RV64-NEXT:    lui a2, 32768
-; RV64-NEXT:    addiw a3, a2, -2
-; RV64-NEXT:    and a0, a0, a3
-; RV64-NEXT:    srli a0, a0, 1
+; RV64-NEXT:    slli a0, a0, 37
+; RV64-NEXT:    srli a0, a0, 38
 ; RV64-NEXT:    or a0, a0, a1
-; RV64-NEXT:    addiw a1, a2, -1
+; RV64-NEXT:    lui a1, 32768
+; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    lui a1, 2341
 ; RV64-NEXT:    addiw a1, a1, -1755
@@ -136,12 +134,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV32M-NEXT:    addi a1, a1, -585
 ; RV32M-NEXT:    mul a0, a0, a1
 ; RV32M-NEXT:    slli a1, a0, 26
-; RV32M-NEXT:    lui a2, 32768
-; RV32M-NEXT:    addi a3, a2, -2
-; RV32M-NEXT:    and a0, a0, a3
-; RV32M-NEXT:    srli a0, a0, 1
+; RV32M-NEXT:    slli a0, a0, 5
+; RV32M-NEXT:    srli a0, a0, 6
 ; RV32M-NEXT:    or a0, a0, a1
-; RV32M-NEXT:    addi a1, a2, -1
+; RV32M-NEXT:    lui a1, 32768
+; RV32M-NEXT:    addi a1, a1, -1
 ; RV32M-NEXT:    and a0, a0, a1
 ; RV32M-NEXT:    lui a1, 2341
 ; RV32M-NEXT:    addi a1, a1, -1755
@@ -154,12 +151,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV64M-NEXT:    addiw a1, a1, -585
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    slli a1, a0, 26
-; RV64M-NEXT:    lui a2, 32768
-; RV64M-NEXT:    addiw a3, a2, -2
-; RV64M-NEXT:    and a0, a0, a3
-; RV64M-NEXT:    srli a0, a0, 1
+; RV64M-NEXT:    slli a0, a0, 37
+; RV64M-NEXT:    srli a0, a0, 38
 ; RV64M-NEXT:    or a0, a0, a1
-; RV64M-NEXT:    addiw a1, a2, -1
+; RV64M-NEXT:    lui a1, 32768
+; RV64M-NEXT:    addiw a1, a1, -1
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 2341
 ; RV64M-NEXT:    addiw a1, a1, -1755
@@ -172,12 +168,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV32MV-NEXT:    addi a1, a1, -585
 ; RV32MV-NEXT:    mul a0, a0, a1
 ; RV32MV-NEXT:    slli a1, a0, 26
-; RV32MV-NEXT:    lui a2, 32768
-; RV32MV-NEXT:    addi a3, a2, -2
-; RV32MV-NEXT:    and a0, a0, a3
-; RV32MV-NEXT:    srli a0, a0, 1
+; RV32MV-NEXT:    slli a0, a0, 5
+; RV32MV-NEXT:    srli a0, a0, 6
 ; RV32MV-NEXT:    or a0, a0, a1
-; RV32MV-NEXT:    addi a1, a2, -1
+; RV32MV-NEXT:    lui a1, 32768
+; RV32MV-NEXT:    addi a1, a1, -1
 ; RV32MV-NEXT:    and a0, a0, a1
 ; RV32MV-NEXT:    lui a1, 2341
 ; RV32MV-NEXT:    addi a1, a1, -1755
@@ -190,12 +185,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV64MV-NEXT:    addiw a1, a1, -585
 ; RV64MV-NEXT:    mul a0, a0, a1
 ; RV64MV-NEXT:    slli a1, a0, 26
-; RV64MV-NEXT:    lui a2, 32768
-; RV64MV-NEXT:    addiw a3, a2, -2
-; RV64MV-NEXT:    and a0, a0, a3
-; RV64MV-NEXT:    srli a0, a0, 1
+; RV64MV-NEXT:    slli a0, a0, 37
+; RV64MV-NEXT:    srli a0, a0, 38
 ; RV64MV-NEXT:    or a0, a0, a1
-; RV64MV-NEXT:    addiw a1, a2, -1
+; RV64MV-NEXT:    lui a1, 32768
+; RV64MV-NEXT:    addiw a1, a1, -1
 ; RV64MV-NEXT:    and a0, a0, a1
 ; RV64MV-NEXT:    lui a1, 2341
 ; RV64MV-NEXT:    addiw a1, a1, -1755
@@ -358,8 +352,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV32-NEXT:    li a1, 683
 ; RV32-NEXT:    call __mulsi3 at plt
 ; RV32-NEXT:    slli a1, a0, 10
-; RV32-NEXT:    andi a0, a0, 2046
-; RV32-NEXT:    srli a0, a0, 1
+; RV32-NEXT:    slli a0, a0, 21
+; RV32-NEXT:    srli a0, a0, 22
 ; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 2047
 ; RV32-NEXT:    li a1, 341
@@ -418,8 +412,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    li a1, 683
 ; RV64-NEXT:    call __muldi3 at plt
 ; RV64-NEXT:    slli a1, a0, 10
-; RV64-NEXT:    andi a0, a0, 2046
-; RV64-NEXT:    srli a0, a0, 1
+; RV64-NEXT:    slli a0, a0, 53
+; RV64-NEXT:    srli a0, a0, 54
 ; RV64-NEXT:    or a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    li a1, 341
@@ -447,10 +441,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    slli a1, s1, 22
 ; RV64-NEXT:    sub a0, a0, a1
 ; RV64-NEXT:    sw a0, 0(s0)
-; RV64-NEXT:    li a1, -1
-; RV64-NEXT:    srli a1, a1, 31
-; RV64-NEXT:    and a0, a0, a1
-; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    slli a0, a0, 31
+; RV64-NEXT:    srli a0, a0, 63
 ; RV64-NEXT:    sb a0, 4(s0)
 ; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
@@ -472,8 +464,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV32M-NEXT:    li a4, 683
 ; RV32M-NEXT:    mul a2, a2, a4
 ; RV32M-NEXT:    slli a4, a2, 10
-; RV32M-NEXT:    andi a2, a2, 2046
-; RV32M-NEXT:    srli a2, a2, 1
+; RV32M-NEXT:    slli a2, a2, 21
+; RV32M-NEXT:    srli a2, a2, 22
 ; RV32M-NEXT:    or a2, a2, a4
 ; RV32M-NEXT:    andi a2, a2, 2047
 ; RV32M-NEXT:    li a4, 341
@@ -517,8 +509,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64M-NEXT:    li a4, 683
 ; RV64M-NEXT:    mul a1, a1, a4
 ; RV64M-NEXT:    slli a4, a1, 10
-; RV64M-NEXT:    andi a1, a1, 2046
-; RV64M-NEXT:    srli a1, a1, 1
+; RV64M-NEXT:    slli a1, a1, 53
+; RV64M-NEXT:    srli a1, a1, 54
 ; RV64M-NEXT:    or a1, a1, a4
 ; RV64M-NEXT:    andi a1, a1, 2047
 ; RV64M-NEXT:    li a4, 341
@@ -544,10 +536,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64M-NEXT:    slli a2, a3, 22
 ; RV64M-NEXT:    sub a1, a1, a2
 ; RV64M-NEXT:    sw a1, 0(a0)
-; RV64M-NEXT:    li a2, -1
-; RV64M-NEXT:    srli a2, a2, 31
-; RV64M-NEXT:    and a1, a1, a2
-; RV64M-NEXT:    srli a1, a1, 32
+; RV64M-NEXT:    slli a1, a1, 31
+; RV64M-NEXT:    srli a1, a1, 63
 ; RV64M-NEXT:    sb a1, 4(a0)
 ; RV64M-NEXT:    ret
 ;
@@ -676,10 +666,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64MV-NEXT:    slli a2, a2, 22
 ; RV64MV-NEXT:    or a1, a1, a2
 ; RV64MV-NEXT:    sw a1, 0(a0)
-; RV64MV-NEXT:    li a2, -1
-; RV64MV-NEXT:    srli a2, a2, 31
-; RV64MV-NEXT:    and a1, a1, a2
-; RV64MV-NEXT:    srli a1, a1, 32
+; RV64MV-NEXT:    slli a1, a1, 31
+; RV64MV-NEXT:    srli a1, a1, 63
 ; RV64MV-NEXT:    sb a1, 4(a0)
 ; RV64MV-NEXT:    addi sp, sp, 16
 ; RV64MV-NEXT:    ret


        


More information about the llvm-commits mailing list