[llvm] c45c1b1 - [RISCV] Teach RISCVDAGToDAGISel::selectShiftMask to replace sub from constant with neg.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 9 12:36:58 PST 2022


Author: Craig Topper
Date: 2022-02-09T12:33:01-08:00
New Revision: c45c1b130b5cdb16426f5f7758a8518ea449ff69

URL: https://github.com/llvm/llvm-project/commit/c45c1b130b5cdb16426f5f7758a8518ea449ff69
DIFF: https://github.com/llvm/llvm-project/commit/c45c1b130b5cdb16426f5f7758a8518ea449ff69.diff

LOG: [RISCV] Teach RISCVDAGToDAGISel::selectShiftMask to replace sub from constant with neg.

If the shift amount is (sub C, X) where C is 0 modulo the size of
the shift, we can replace it with neg or negw.

Similar is is done for AArch64 and X86.

Reviewed By: khchen

Differential Revision: https://reviews.llvm.org/D119089

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/rotl-rotr.ll
    llvm/test/CodeGen/RISCV/shifts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 520f593341190..3497dd24fd5c7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1737,6 +1737,22 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
       ShAmt = N.getOperand(0);
       return true;
     }
+  } else if (N.getOpcode() == ISD::SUB &&
+             isa<ConstantSDNode>(N.getOperand(0))) {
+    uint64_t Imm = N.getConstantOperandVal(0);
+    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+    // generate a NEG instead of a SUB of a constant.
+    if (Imm != 0 && Imm % ShiftWidth == 0) {
+      SDLoc DL(N);
+      EVT VT = N.getValueType();
+      SDValue Zero =
+          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
+      unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
+      MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
+                                                  N.getOperand(1));
+      ShAmt = SDValue(Neg, 0);
+      return true;
+    }
   }
 
   ShAmt = N;

diff  --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index 75943edf4187e..aace4ac34216d 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -17,8 +17,7 @@
 define i32 @rotl_32(i32 %x, i32 %y) nounwind {
 ; RV32I-LABEL: rotl_32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 32
-; RV32I-NEXT:    sub a2, a2, a1
+; RV32I-NEXT:    neg a2, a1
 ; RV32I-NEXT:    sll a1, a0, a1
 ; RV32I-NEXT:    srl a0, a0, a2
 ; RV32I-NEXT:    or a0, a1, a0
@@ -26,8 +25,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: rotl_32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 32
-; RV64I-NEXT:    subw a2, a2, a1
+; RV64I-NEXT:    negw a2, a1
 ; RV64I-NEXT:    sllw a1, a0, a1
 ; RV64I-NEXT:    srlw a0, a0, a2
 ; RV64I-NEXT:    or a0, a1, a0
@@ -52,8 +50,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
 define i32 @rotr_32(i32 %x, i32 %y) nounwind {
 ; RV32I-LABEL: rotr_32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 32
-; RV32I-NEXT:    sub a2, a2, a1
+; RV32I-NEXT:    neg a2, a1
 ; RV32I-NEXT:    srl a1, a0, a1
 ; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    or a0, a1, a0
@@ -61,8 +58,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: rotr_32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 32
-; RV64I-NEXT:    subw a2, a2, a1
+; RV64I-NEXT:    negw a2, a1
 ; RV64I-NEXT:    srlw a1, a0, a1
 ; RV64I-NEXT:    sllw a0, a0, a2
 ; RV64I-NEXT:    or a0, a1, a0
@@ -89,47 +85,48 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    addi a5, a2, -32
-; RV32I-NEXT:    li a4, 31
+; RV32I-NEXT:    li a6, 31
 ; RV32I-NEXT:    bltz a5, .LBB2_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sll a1, a0, a5
 ; RV32I-NEXT:    j .LBB2_3
 ; RV32I-NEXT:  .LBB2_2:
 ; RV32I-NEXT:    sll a1, a3, a2
-; RV32I-NEXT:    sub a6, a4, a2
+; RV32I-NEXT:    sub a4, a6, a2
 ; RV32I-NEXT:    srli a7, a0, 1
-; RV32I-NEXT:    srl a6, a7, a6
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    srl a4, a7, a4
+; RV32I-NEXT:    or a1, a1, a4
 ; RV32I-NEXT:  .LBB2_3:
-; RV32I-NEXT:    li a6, 32
-; RV32I-NEXT:    sub a6, a6, a2
-; RV32I-NEXT:    bltz a6, .LBB2_5
+; RV32I-NEXT:    neg a7, a2
+; RV32I-NEXT:    li a4, 32
+; RV32I-NEXT:    sub t0, a4, a2
+; RV32I-NEXT:    srl a4, a3, a7
+; RV32I-NEXT:    bltz t0, .LBB2_6
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    srl a4, a3, a6
-; RV32I-NEXT:    bltz a5, .LBB2_6
-; RV32I-NEXT:    j .LBB2_7
+; RV32I-NEXT:    bltz a5, .LBB2_7
 ; RV32I-NEXT:  .LBB2_5:
-; RV32I-NEXT:    li a6, 64
-; RV32I-NEXT:    sub a6, a6, a2
-; RV32I-NEXT:    srl a7, a0, a6
-; RV32I-NEXT:    sub a4, a4, a6
-; RV32I-NEXT:    slli t0, a3, 1
-; RV32I-NEXT:    sll a4, t0, a4
-; RV32I-NEXT:    or a4, a7, a4
-; RV32I-NEXT:    srl a3, a3, a6
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    bgez a5, .LBB2_7
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB2_6:
+; RV32I-NEXT:    srl a7, a0, a7
+; RV32I-NEXT:    li t0, 64
+; RV32I-NEXT:    sub t0, t0, a2
+; RV32I-NEXT:    sub a6, a6, t0
+; RV32I-NEXT:    slli a3, a3, 1
+; RV32I-NEXT:    sll a3, a3, a6
+; RV32I-NEXT:    or a3, a7, a3
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    bgez a5, .LBB2_5
+; RV32I-NEXT:  .LBB2_7:
 ; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    or a4, a4, a0
-; RV32I-NEXT:  .LBB2_7:
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: rotl_64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 64
-; RV64I-NEXT:    sub a2, a2, a1
+; RV64I-NEXT:    negw a2, a1
 ; RV64I-NEXT:    sll a1, a0, a1
 ; RV64I-NEXT:    srl a0, a0, a2
 ; RV64I-NEXT:    or a0, a1, a0
@@ -139,40 +136,42 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ; RV32ZBB:       # %bb.0:
 ; RV32ZBB-NEXT:    mv a3, a1
 ; RV32ZBB-NEXT:    addi a5, a2, -32
-; RV32ZBB-NEXT:    li a4, 31
+; RV32ZBB-NEXT:    li a6, 31
 ; RV32ZBB-NEXT:    bltz a5, .LBB2_2
 ; RV32ZBB-NEXT:  # %bb.1:
 ; RV32ZBB-NEXT:    sll a1, a0, a5
 ; RV32ZBB-NEXT:    j .LBB2_3
 ; RV32ZBB-NEXT:  .LBB2_2:
 ; RV32ZBB-NEXT:    sll a1, a3, a2
-; RV32ZBB-NEXT:    sub a6, a4, a2
+; RV32ZBB-NEXT:    sub a4, a6, a2
 ; RV32ZBB-NEXT:    srli a7, a0, 1
-; RV32ZBB-NEXT:    srl a6, a7, a6
-; RV32ZBB-NEXT:    or a1, a1, a6
+; RV32ZBB-NEXT:    srl a4, a7, a4
+; RV32ZBB-NEXT:    or a1, a1, a4
 ; RV32ZBB-NEXT:  .LBB2_3:
-; RV32ZBB-NEXT:    li a6, 32
-; RV32ZBB-NEXT:    sub a6, a6, a2
-; RV32ZBB-NEXT:    bltz a6, .LBB2_5
+; RV32ZBB-NEXT:    neg a7, a2
+; RV32ZBB-NEXT:    li a4, 32
+; RV32ZBB-NEXT:    sub t0, a4, a2
+; RV32ZBB-NEXT:    srl a4, a3, a7
+; RV32ZBB-NEXT:    bltz t0, .LBB2_6
 ; RV32ZBB-NEXT:  # %bb.4:
-; RV32ZBB-NEXT:    srl a4, a3, a6
-; RV32ZBB-NEXT:    bltz a5, .LBB2_6
-; RV32ZBB-NEXT:    j .LBB2_7
+; RV32ZBB-NEXT:    bltz a5, .LBB2_7
 ; RV32ZBB-NEXT:  .LBB2_5:
-; RV32ZBB-NEXT:    li a6, 64
-; RV32ZBB-NEXT:    sub a6, a6, a2
-; RV32ZBB-NEXT:    srl a7, a0, a6
-; RV32ZBB-NEXT:    sub a4, a4, a6
-; RV32ZBB-NEXT:    slli t0, a3, 1
-; RV32ZBB-NEXT:    sll a4, t0, a4
-; RV32ZBB-NEXT:    or a4, a7, a4
-; RV32ZBB-NEXT:    srl a3, a3, a6
-; RV32ZBB-NEXT:    or a1, a1, a3
-; RV32ZBB-NEXT:    bgez a5, .LBB2_7
+; RV32ZBB-NEXT:    mv a0, a4
+; RV32ZBB-NEXT:    ret
 ; RV32ZBB-NEXT:  .LBB2_6:
+; RV32ZBB-NEXT:    srl a7, a0, a7
+; RV32ZBB-NEXT:    li t0, 64
+; RV32ZBB-NEXT:    sub t0, t0, a2
+; RV32ZBB-NEXT:    sub a6, a6, t0
+; RV32ZBB-NEXT:    slli a3, a3, 1
+; RV32ZBB-NEXT:    sll a3, a3, a6
+; RV32ZBB-NEXT:    or a3, a7, a3
+; RV32ZBB-NEXT:    or a1, a1, a4
+; RV32ZBB-NEXT:    mv a4, a3
+; RV32ZBB-NEXT:    bgez a5, .LBB2_5
+; RV32ZBB-NEXT:  .LBB2_7:
 ; RV32ZBB-NEXT:    sll a0, a0, a2
 ; RV32ZBB-NEXT:    or a4, a4, a0
-; RV32ZBB-NEXT:  .LBB2_7:
 ; RV32ZBB-NEXT:    mv a0, a4
 ; RV32ZBB-NEXT:    ret
 ;
@@ -190,49 +189,50 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64(i64 %x, i64 %y) nounwind {
 ; RV32I-LABEL: rotr_64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    addi a5, a2, -32
-; RV32I-NEXT:    li a4, 31
+; RV32I-NEXT:    li a6, 31
 ; RV32I-NEXT:    bltz a5, .LBB3_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srl a0, a1, a5
 ; RV32I-NEXT:    j .LBB3_3
 ; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srl a0, a3, a2
-; RV32I-NEXT:    sub a6, a4, a2
+; RV32I-NEXT:    srl a0, a4, a2
+; RV32I-NEXT:    sub a3, a6, a2
 ; RV32I-NEXT:    slli a7, a1, 1
-; RV32I-NEXT:    sll a6, a7, a6
-; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    sll a3, a7, a3
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:  .LBB3_3:
-; RV32I-NEXT:    li a6, 32
-; RV32I-NEXT:    sub a6, a6, a2
-; RV32I-NEXT:    bltz a6, .LBB3_5
+; RV32I-NEXT:    neg a7, a2
+; RV32I-NEXT:    li a3, 32
+; RV32I-NEXT:    sub t0, a3, a2
+; RV32I-NEXT:    sll a3, a4, a7
+; RV32I-NEXT:    bltz t0, .LBB3_6
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    sll a4, a3, a6
-; RV32I-NEXT:    bltz a5, .LBB3_6
-; RV32I-NEXT:    j .LBB3_7
+; RV32I-NEXT:    bltz a5, .LBB3_7
 ; RV32I-NEXT:  .LBB3_5:
-; RV32I-NEXT:    li a6, 64
-; RV32I-NEXT:    sub a6, a6, a2
-; RV32I-NEXT:    sll a7, a1, a6
-; RV32I-NEXT:    sub a4, a4, a6
-; RV32I-NEXT:    srli t0, a3, 1
-; RV32I-NEXT:    srl a4, t0, a4
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB3_6:
+; RV32I-NEXT:    sll a7, a1, a7
+; RV32I-NEXT:    li t0, 64
+; RV32I-NEXT:    sub t0, t0, a2
+; RV32I-NEXT:    sub a6, a6, t0
+; RV32I-NEXT:    srli a4, a4, 1
+; RV32I-NEXT:    srl a4, a4, a6
 ; RV32I-NEXT:    or a4, a7, a4
-; RV32I-NEXT:    sll a3, a3, a6
 ; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    bgez a5, .LBB3_7
-; RV32I-NEXT:  .LBB3_6:
-; RV32I-NEXT:    srl a1, a1, a2
-; RV32I-NEXT:    or a4, a4, a1
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    bgez a5, .LBB3_5
 ; RV32I-NEXT:  .LBB3_7:
-; RV32I-NEXT:    mv a1, a4
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a3, a3, a1
+; RV32I-NEXT:    mv a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: rotr_64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 64
-; RV64I-NEXT:    sub a2, a2, a1
+; RV64I-NEXT:    negw a2, a1
 ; RV64I-NEXT:    srl a1, a0, a1
 ; RV64I-NEXT:    sll a0, a0, a2
 ; RV64I-NEXT:    or a0, a1, a0
@@ -240,43 +240,45 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
 ;
 ; RV32ZBB-LABEL: rotr_64:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a4, a0
 ; RV32ZBB-NEXT:    addi a5, a2, -32
-; RV32ZBB-NEXT:    li a4, 31
+; RV32ZBB-NEXT:    li a6, 31
 ; RV32ZBB-NEXT:    bltz a5, .LBB3_2
 ; RV32ZBB-NEXT:  # %bb.1:
 ; RV32ZBB-NEXT:    srl a0, a1, a5
 ; RV32ZBB-NEXT:    j .LBB3_3
 ; RV32ZBB-NEXT:  .LBB3_2:
-; RV32ZBB-NEXT:    srl a0, a3, a2
-; RV32ZBB-NEXT:    sub a6, a4, a2
+; RV32ZBB-NEXT:    srl a0, a4, a2
+; RV32ZBB-NEXT:    sub a3, a6, a2
 ; RV32ZBB-NEXT:    slli a7, a1, 1
-; RV32ZBB-NEXT:    sll a6, a7, a6
-; RV32ZBB-NEXT:    or a0, a0, a6
+; RV32ZBB-NEXT:    sll a3, a7, a3
+; RV32ZBB-NEXT:    or a0, a0, a3
 ; RV32ZBB-NEXT:  .LBB3_3:
-; RV32ZBB-NEXT:    li a6, 32
-; RV32ZBB-NEXT:    sub a6, a6, a2
-; RV32ZBB-NEXT:    bltz a6, .LBB3_5
+; RV32ZBB-NEXT:    neg a7, a2
+; RV32ZBB-NEXT:    li a3, 32
+; RV32ZBB-NEXT:    sub t0, a3, a2
+; RV32ZBB-NEXT:    sll a3, a4, a7
+; RV32ZBB-NEXT:    bltz t0, .LBB3_6
 ; RV32ZBB-NEXT:  # %bb.4:
-; RV32ZBB-NEXT:    sll a4, a3, a6
-; RV32ZBB-NEXT:    bltz a5, .LBB3_6
-; RV32ZBB-NEXT:    j .LBB3_7
+; RV32ZBB-NEXT:    bltz a5, .LBB3_7
 ; RV32ZBB-NEXT:  .LBB3_5:
-; RV32ZBB-NEXT:    li a6, 64
-; RV32ZBB-NEXT:    sub a6, a6, a2
-; RV32ZBB-NEXT:    sll a7, a1, a6
-; RV32ZBB-NEXT:    sub a4, a4, a6
-; RV32ZBB-NEXT:    srli t0, a3, 1
-; RV32ZBB-NEXT:    srl a4, t0, a4
+; RV32ZBB-NEXT:    mv a1, a3
+; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:  .LBB3_6:
+; RV32ZBB-NEXT:    sll a7, a1, a7
+; RV32ZBB-NEXT:    li t0, 64
+; RV32ZBB-NEXT:    sub t0, t0, a2
+; RV32ZBB-NEXT:    sub a6, a6, t0
+; RV32ZBB-NEXT:    srli a4, a4, 1
+; RV32ZBB-NEXT:    srl a4, a4, a6
 ; RV32ZBB-NEXT:    or a4, a7, a4
-; RV32ZBB-NEXT:    sll a3, a3, a6
 ; RV32ZBB-NEXT:    or a0, a0, a3
-; RV32ZBB-NEXT:    bgez a5, .LBB3_7
-; RV32ZBB-NEXT:  .LBB3_6:
-; RV32ZBB-NEXT:    srl a1, a1, a2
-; RV32ZBB-NEXT:    or a4, a4, a1
+; RV32ZBB-NEXT:    mv a3, a4
+; RV32ZBB-NEXT:    bgez a5, .LBB3_5
 ; RV32ZBB-NEXT:  .LBB3_7:
-; RV32ZBB-NEXT:    mv a1, a4
+; RV32ZBB-NEXT:    srl a1, a1, a2
+; RV32ZBB-NEXT:    or a3, a3, a1
+; RV32ZBB-NEXT:    mv a1, a3
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: rotr_64:

diff  --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 1353f2cd8c638..7e63aa91262a2 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -151,18 +151,20 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    lw a2, 0(a2)
 ; RV32I-NEXT:    lw a5, 8(a1)
 ; RV32I-NEXT:    lw a4, 12(a1)
+; RV32I-NEXT:    neg a6, a2
 ; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    sub t0, a3, a2
-; RV32I-NEXT:    li a6, 32
-; RV32I-NEXT:    sub t1, a6, a2
 ; RV32I-NEXT:    li t2, 31
+; RV32I-NEXT:    li a7, 32
+; RV32I-NEXT:    sub t1, a7, a2
+; RV32I-NEXT:    sll t0, a5, a6
 ; RV32I-NEXT:    bltz t1, .LBB6_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll t6, a5, t1
+; RV32I-NEXT:    mv t6, t0
 ; RV32I-NEXT:    j .LBB6_3
 ; RV32I-NEXT:  .LBB6_2:
-; RV32I-NEXT:    sll a6, a4, t0
-; RV32I-NEXT:    sub a7, t2, t0
+; RV32I-NEXT:    sll a6, a4, a6
+; RV32I-NEXT:    sub a7, a3, a2
+; RV32I-NEXT:    sub a7, t2, a7
 ; RV32I-NEXT:    srli t3, a5, 1
 ; RV32I-NEXT:    srl a7, t3, a7
 ; RV32I-NEXT:    or t6, a6, a7
@@ -206,7 +208,6 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    or t5, t6, t5
 ; RV32I-NEXT:    bgez t1, .LBB6_15
 ; RV32I-NEXT:  .LBB6_14:
-; RV32I-NEXT:    sll t0, a5, t0
 ; RV32I-NEXT:    or t5, t5, t0
 ; RV32I-NEXT:  .LBB6_15:
 ; RV32I-NEXT:    slli t0, a4, 1
@@ -289,18 +290,20 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    lw a2, 0(a2)
 ; RV32I-NEXT:    lw a5, 8(a1)
 ; RV32I-NEXT:    lw a4, 12(a1)
+; RV32I-NEXT:    neg a6, a2
 ; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    sub t1, a3, a2
-; RV32I-NEXT:    li a6, 32
-; RV32I-NEXT:    sub t2, a6, a2
-; RV32I-NEXT:    li t4, 31
+; RV32I-NEXT:    li t3, 31
+; RV32I-NEXT:    li a7, 32
+; RV32I-NEXT:    sub t2, a7, a2
+; RV32I-NEXT:    sll t1, a5, a6
 ; RV32I-NEXT:    bltz t2, .LBB7_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll s0, a5, t2
+; RV32I-NEXT:    mv s0, t1
 ; RV32I-NEXT:    j .LBB7_3
 ; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    sll a6, a4, t1
-; RV32I-NEXT:    sub a7, t4, t1
+; RV32I-NEXT:    sll a6, a4, a6
+; RV32I-NEXT:    sub a7, a3, a2
+; RV32I-NEXT:    sub a7, t3, a7
 ; RV32I-NEXT:    srli t0, a5, 1
 ; RV32I-NEXT:    srl a7, t0, a7
 ; RV32I-NEXT:    or s0, a6, a7
@@ -312,7 +315,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    srl a7, t6, a2
 ; RV32I-NEXT:    or s0, s0, a7
 ; RV32I-NEXT:  .LBB7_5:
-; RV32I-NEXT:    addi t3, a2, -64
+; RV32I-NEXT:    addi t4, a2, -64
 ; RV32I-NEXT:    addi t5, a2, -96
 ; RV32I-NEXT:    srai a7, a4, 31
 ; RV32I-NEXT:    bltz t5, .LBB7_7
@@ -321,7 +324,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    bgeu a2, a3, .LBB7_8
 ; RV32I-NEXT:    j .LBB7_9
 ; RV32I-NEXT:  .LBB7_7:
-; RV32I-NEXT:    sra t0, a4, t3
+; RV32I-NEXT:    sra t0, a4, t4
 ; RV32I-NEXT:    bltu a2, a3, .LBB7_9
 ; RV32I-NEXT:  .LBB7_8:
 ; RV32I-NEXT:    mv s0, t0
@@ -332,7 +335,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    mv t0, s0
 ; RV32I-NEXT:  .LBB7_11:
 ; RV32I-NEXT:    lw a1, 0(a1)
-; RV32I-NEXT:    sub t4, t4, a2
+; RV32I-NEXT:    sub t3, t3, a2
 ; RV32I-NEXT:    bltz a6, .LBB7_13
 ; RV32I-NEXT:  # %bb.12:
 ; RV32I-NEXT:    srl t6, t6, a6
@@ -341,11 +344,10 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:  .LBB7_13:
 ; RV32I-NEXT:    srl s0, a1, a2
 ; RV32I-NEXT:    slli t6, t6, 1
-; RV32I-NEXT:    sll t6, t6, t4
+; RV32I-NEXT:    sll t6, t6, t3
 ; RV32I-NEXT:    or t6, s0, t6
 ; RV32I-NEXT:    bgez t2, .LBB7_15
 ; RV32I-NEXT:  .LBB7_14:
-; RV32I-NEXT:    sll t1, a5, t1
 ; RV32I-NEXT:    or t6, t6, t1
 ; RV32I-NEXT:  .LBB7_15:
 ; RV32I-NEXT:    slli t1, a4, 1
@@ -358,8 +360,8 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    li t2, 95
 ; RV32I-NEXT:    sub t2, t2, a2
 ; RV32I-NEXT:    sll t2, t1, t2
-; RV32I-NEXT:    srl t3, a5, t3
-; RV32I-NEXT:    or t2, t3, t2
+; RV32I-NEXT:    srl t4, a5, t4
+; RV32I-NEXT:    or t2, t4, t2
 ; RV32I-NEXT:    bltu a2, a3, .LBB7_19
 ; RV32I-NEXT:  .LBB7_18:
 ; RV32I-NEXT:    mv t6, t2
@@ -376,7 +378,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    bgez a6, .LBB7_21
 ; RV32I-NEXT:  .LBB7_23:
 ; RV32I-NEXT:    srl a5, a5, a2
-; RV32I-NEXT:    sll t1, t1, t4
+; RV32I-NEXT:    sll t1, t1, t3
 ; RV32I-NEXT:    or a5, a5, t1
 ; RV32I-NEXT:    bltu a2, a3, .LBB7_25
 ; RV32I-NEXT:  .LBB7_24:
@@ -428,18 +430,20 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    lw a2, 0(a2)
 ; RV32I-NEXT:    lw a5, 4(a1)
 ; RV32I-NEXT:    lw a4, 0(a1)
+; RV32I-NEXT:    neg a6, a2
 ; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    sub t0, a3, a2
-; RV32I-NEXT:    li a6, 32
-; RV32I-NEXT:    sub t1, a6, a2
 ; RV32I-NEXT:    li t2, 31
+; RV32I-NEXT:    li a7, 32
+; RV32I-NEXT:    sub t1, a7, a2
+; RV32I-NEXT:    srl t0, a5, a6
 ; RV32I-NEXT:    bltz t1, .LBB8_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    srl t6, a5, t1
+; RV32I-NEXT:    mv t6, t0
 ; RV32I-NEXT:    j .LBB8_3
 ; RV32I-NEXT:  .LBB8_2:
-; RV32I-NEXT:    srl a6, a4, t0
-; RV32I-NEXT:    sub a7, t2, t0
+; RV32I-NEXT:    srl a6, a4, a6
+; RV32I-NEXT:    sub a7, a3, a2
+; RV32I-NEXT:    sub a7, t2, a7
 ; RV32I-NEXT:    slli t3, a5, 1
 ; RV32I-NEXT:    sll a7, t3, a7
 ; RV32I-NEXT:    or t6, a6, a7
@@ -483,7 +487,6 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    or t5, t6, t5
 ; RV32I-NEXT:    bgez t1, .LBB8_15
 ; RV32I-NEXT:  .LBB8_14:
-; RV32I-NEXT:    srl t0, a5, t0
 ; RV32I-NEXT:    or t5, t5, t0
 ; RV32I-NEXT:  .LBB8_15:
 ; RV32I-NEXT:    srli t0, a4, 1


        


More information about the llvm-commits mailing list