[llvm] c45c1b1 - [RISCV] Teach RISCVDAGToDAGISel::selectShiftMask to replace sub from constant with neg.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 9 12:36:58 PST 2022
Author: Craig Topper
Date: 2022-02-09T12:33:01-08:00
New Revision: c45c1b130b5cdb16426f5f7758a8518ea449ff69
URL: https://github.com/llvm/llvm-project/commit/c45c1b130b5cdb16426f5f7758a8518ea449ff69
DIFF: https://github.com/llvm/llvm-project/commit/c45c1b130b5cdb16426f5f7758a8518ea449ff69.diff
LOG: [RISCV] Teach RISCVDAGToDAGISel::selectShiftMask to replace sub from constant with neg.
If the shift amount is (sub C, X) where C is 0 modulo the size of
the shift, we can replace it with neg or negw.
Similar is is done for AArch64 and X86.
Reviewed By: khchen
Differential Revision: https://reviews.llvm.org/D119089
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/test/CodeGen/RISCV/rotl-rotr.ll
llvm/test/CodeGen/RISCV/shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 520f593341190..3497dd24fd5c7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1737,6 +1737,22 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
ShAmt = N.getOperand(0);
return true;
}
+ } else if (N.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(N.getOperand(0))) {
+ uint64_t Imm = N.getConstantOperandVal(0);
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ if (Imm != 0 && Imm % ShiftWidth == 0) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ SDValue Zero =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
+ unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
+ MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
+ N.getOperand(1));
+ ShAmt = SDValue(Neg, 0);
+ return true;
+ }
}
ShAmt = N;
diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index 75943edf4187e..aace4ac34216d 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -17,8 +17,7 @@
define i32 @rotl_32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: rotl_32:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a2, 32
-; RV32I-NEXT: sub a2, a2, a1
+; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: sll a1, a0, a1
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: or a0, a1, a0
@@ -26,8 +25,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 32
-; RV64I-NEXT: subw a2, a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -52,8 +50,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
define i32 @rotr_32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: rotr_32:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a2, 32
-; RV32I-NEXT: sub a2, a2, a1
+; RV32I-NEXT: neg a2, a1
; RV32I-NEXT: srl a1, a0, a1
; RV32I-NEXT: sll a0, a0, a2
; RV32I-NEXT: or a0, a1, a0
@@ -61,8 +58,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 32
-; RV64I-NEXT: subw a2, a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -89,47 +85,48 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: mv a3, a1
; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: li a4, 31
+; RV32I-NEXT: li a6, 31
; RV32I-NEXT: bltz a5, .LBB2_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a5
; RV32I-NEXT: j .LBB2_3
; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: sll a1, a3, a2
-; RV32I-NEXT: sub a6, a4, a2
+; RV32I-NEXT: sub a4, a6, a2
; RV32I-NEXT: srli a7, a0, 1
-; RV32I-NEXT: srl a6, a7, a6
-; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: srl a4, a7, a4
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: li a6, 32
-; RV32I-NEXT: sub a6, a6, a2
-; RV32I-NEXT: bltz a6, .LBB2_5
+; RV32I-NEXT: neg a7, a2
+; RV32I-NEXT: li a4, 32
+; RV32I-NEXT: sub t0, a4, a2
+; RV32I-NEXT: srl a4, a3, a7
+; RV32I-NEXT: bltz t0, .LBB2_6
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a4, a3, a6
-; RV32I-NEXT: bltz a5, .LBB2_6
-; RV32I-NEXT: j .LBB2_7
+; RV32I-NEXT: bltz a5, .LBB2_7
; RV32I-NEXT: .LBB2_5:
-; RV32I-NEXT: li a6, 64
-; RV32I-NEXT: sub a6, a6, a2
-; RV32I-NEXT: srl a7, a0, a6
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: slli t0, a3, 1
-; RV32I-NEXT: sll a4, t0, a4
-; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: srl a3, a3, a6
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: bgez a5, .LBB2_7
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_6:
+; RV32I-NEXT: srl a7, a0, a7
+; RV32I-NEXT: li t0, 64
+; RV32I-NEXT: sub t0, t0, a2
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: sll a3, a3, a6
+; RV32I-NEXT: or a3, a7, a3
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: bgez a5, .LBB2_5
+; RV32I-NEXT: .LBB2_7:
; RV32I-NEXT: sll a0, a0, a2
; RV32I-NEXT: or a4, a4, a0
-; RV32I-NEXT: .LBB2_7:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotl_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 64
-; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -139,40 +136,42 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: mv a3, a1
; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: li a4, 31
+; RV32ZBB-NEXT: li a6, 31
; RV32ZBB-NEXT: bltz a5, .LBB2_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: sll a1, a0, a5
; RV32ZBB-NEXT: j .LBB2_3
; RV32ZBB-NEXT: .LBB2_2:
; RV32ZBB-NEXT: sll a1, a3, a2
-; RV32ZBB-NEXT: sub a6, a4, a2
+; RV32ZBB-NEXT: sub a4, a6, a2
; RV32ZBB-NEXT: srli a7, a0, 1
-; RV32ZBB-NEXT: srl a6, a7, a6
-; RV32ZBB-NEXT: or a1, a1, a6
+; RV32ZBB-NEXT: srl a4, a7, a4
+; RV32ZBB-NEXT: or a1, a1, a4
; RV32ZBB-NEXT: .LBB2_3:
-; RV32ZBB-NEXT: li a6, 32
-; RV32ZBB-NEXT: sub a6, a6, a2
-; RV32ZBB-NEXT: bltz a6, .LBB2_5
+; RV32ZBB-NEXT: neg a7, a2
+; RV32ZBB-NEXT: li a4, 32
+; RV32ZBB-NEXT: sub t0, a4, a2
+; RV32ZBB-NEXT: srl a4, a3, a7
+; RV32ZBB-NEXT: bltz t0, .LBB2_6
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: srl a4, a3, a6
-; RV32ZBB-NEXT: bltz a5, .LBB2_6
-; RV32ZBB-NEXT: j .LBB2_7
+; RV32ZBB-NEXT: bltz a5, .LBB2_7
; RV32ZBB-NEXT: .LBB2_5:
-; RV32ZBB-NEXT: li a6, 64
-; RV32ZBB-NEXT: sub a6, a6, a2
-; RV32ZBB-NEXT: srl a7, a0, a6
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: slli t0, a3, 1
-; RV32ZBB-NEXT: sll a4, t0, a4
-; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: srl a3, a3, a6
-; RV32ZBB-NEXT: or a1, a1, a3
-; RV32ZBB-NEXT: bgez a5, .LBB2_7
+; RV32ZBB-NEXT: mv a0, a4
+; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB2_6:
+; RV32ZBB-NEXT: srl a7, a0, a7
+; RV32ZBB-NEXT: li t0, 64
+; RV32ZBB-NEXT: sub t0, t0, a2
+; RV32ZBB-NEXT: sub a6, a6, t0
+; RV32ZBB-NEXT: slli a3, a3, 1
+; RV32ZBB-NEXT: sll a3, a3, a6
+; RV32ZBB-NEXT: or a3, a7, a3
+; RV32ZBB-NEXT: or a1, a1, a4
+; RV32ZBB-NEXT: mv a4, a3
+; RV32ZBB-NEXT: bgez a5, .LBB2_5
+; RV32ZBB-NEXT: .LBB2_7:
; RV32ZBB-NEXT: sll a0, a0, a2
; RV32ZBB-NEXT: or a4, a4, a0
-; RV32ZBB-NEXT: .LBB2_7:
; RV32ZBB-NEXT: mv a0, a4
; RV32ZBB-NEXT: ret
;
@@ -190,49 +189,50 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: rotr_64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: li a4, 31
+; RV32I-NEXT: li a6, 31
; RV32I-NEXT: bltz a5, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a5
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srl a0, a3, a2
-; RV32I-NEXT: sub a6, a4, a2
+; RV32I-NEXT: srl a0, a4, a2
+; RV32I-NEXT: sub a3, a6, a2
; RV32I-NEXT: slli a7, a1, 1
-; RV32I-NEXT: sll a6, a7, a6
-; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: sll a3, a7, a3
+; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: li a6, 32
-; RV32I-NEXT: sub a6, a6, a2
-; RV32I-NEXT: bltz a6, .LBB3_5
+; RV32I-NEXT: neg a7, a2
+; RV32I-NEXT: li a3, 32
+; RV32I-NEXT: sub t0, a3, a2
+; RV32I-NEXT: sll a3, a4, a7
+; RV32I-NEXT: bltz t0, .LBB3_6
; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a4, a3, a6
-; RV32I-NEXT: bltz a5, .LBB3_6
-; RV32I-NEXT: j .LBB3_7
+; RV32I-NEXT: bltz a5, .LBB3_7
; RV32I-NEXT: .LBB3_5:
-; RV32I-NEXT: li a6, 64
-; RV32I-NEXT: sub a6, a6, a2
-; RV32I-NEXT: sll a7, a1, a6
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: srli t0, a3, 1
-; RV32I-NEXT: srl a4, t0, a4
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB3_6:
+; RV32I-NEXT: sll a7, a1, a7
+; RV32I-NEXT: li t0, 64
+; RV32I-NEXT: sub t0, t0, a2
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: srl a4, a4, a6
; RV32I-NEXT: or a4, a7, a4
-; RV32I-NEXT: sll a3, a3, a6
; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: bgez a5, .LBB3_7
-; RV32I-NEXT: .LBB3_6:
-; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: or a4, a4, a1
+; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: bgez a5, .LBB3_5
; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: or a3, a3, a1
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: rotr_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a2, 64
-; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: negw a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -240,43 +240,45 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
;
; RV32ZBB-LABEL: rotr_64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: mv a4, a0
; RV32ZBB-NEXT: addi a5, a2, -32
-; RV32ZBB-NEXT: li a4, 31
+; RV32ZBB-NEXT: li a6, 31
; RV32ZBB-NEXT: bltz a5, .LBB3_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a0, a1, a5
; RV32ZBB-NEXT: j .LBB3_3
; RV32ZBB-NEXT: .LBB3_2:
-; RV32ZBB-NEXT: srl a0, a3, a2
-; RV32ZBB-NEXT: sub a6, a4, a2
+; RV32ZBB-NEXT: srl a0, a4, a2
+; RV32ZBB-NEXT: sub a3, a6, a2
; RV32ZBB-NEXT: slli a7, a1, 1
-; RV32ZBB-NEXT: sll a6, a7, a6
-; RV32ZBB-NEXT: or a0, a0, a6
+; RV32ZBB-NEXT: sll a3, a7, a3
+; RV32ZBB-NEXT: or a0, a0, a3
; RV32ZBB-NEXT: .LBB3_3:
-; RV32ZBB-NEXT: li a6, 32
-; RV32ZBB-NEXT: sub a6, a6, a2
-; RV32ZBB-NEXT: bltz a6, .LBB3_5
+; RV32ZBB-NEXT: neg a7, a2
+; RV32ZBB-NEXT: li a3, 32
+; RV32ZBB-NEXT: sub t0, a3, a2
+; RV32ZBB-NEXT: sll a3, a4, a7
+; RV32ZBB-NEXT: bltz t0, .LBB3_6
; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: sll a4, a3, a6
-; RV32ZBB-NEXT: bltz a5, .LBB3_6
-; RV32ZBB-NEXT: j .LBB3_7
+; RV32ZBB-NEXT: bltz a5, .LBB3_7
; RV32ZBB-NEXT: .LBB3_5:
-; RV32ZBB-NEXT: li a6, 64
-; RV32ZBB-NEXT: sub a6, a6, a2
-; RV32ZBB-NEXT: sll a7, a1, a6
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: srli t0, a3, 1
-; RV32ZBB-NEXT: srl a4, t0, a4
+; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: ret
+; RV32ZBB-NEXT: .LBB3_6:
+; RV32ZBB-NEXT: sll a7, a1, a7
+; RV32ZBB-NEXT: li t0, 64
+; RV32ZBB-NEXT: sub t0, t0, a2
+; RV32ZBB-NEXT: sub a6, a6, t0
+; RV32ZBB-NEXT: srli a4, a4, 1
+; RV32ZBB-NEXT: srl a4, a4, a6
; RV32ZBB-NEXT: or a4, a7, a4
-; RV32ZBB-NEXT: sll a3, a3, a6
; RV32ZBB-NEXT: or a0, a0, a3
-; RV32ZBB-NEXT: bgez a5, .LBB3_7
-; RV32ZBB-NEXT: .LBB3_6:
-; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: or a4, a4, a1
+; RV32ZBB-NEXT: mv a3, a4
+; RV32ZBB-NEXT: bgez a5, .LBB3_5
; RV32ZBB-NEXT: .LBB3_7:
-; RV32ZBB-NEXT: mv a1, a4
+; RV32ZBB-NEXT: srl a1, a1, a2
+; RV32ZBB-NEXT: or a3, a3, a1
+; RV32ZBB-NEXT: mv a1, a3
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: rotr_64:
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 1353f2cd8c638..7e63aa91262a2 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -151,18 +151,20 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a2, 0(a2)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: neg a6, a2
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: sub t0, a3, a2
-; RV32I-NEXT: li a6, 32
-; RV32I-NEXT: sub t1, a6, a2
; RV32I-NEXT: li t2, 31
+; RV32I-NEXT: li a7, 32
+; RV32I-NEXT: sub t1, a7, a2
+; RV32I-NEXT: sll t0, a5, a6
; RV32I-NEXT: bltz t1, .LBB6_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll t6, a5, t1
+; RV32I-NEXT: mv t6, t0
; RV32I-NEXT: j .LBB6_3
; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: sll a6, a4, t0
-; RV32I-NEXT: sub a7, t2, t0
+; RV32I-NEXT: sll a6, a4, a6
+; RV32I-NEXT: sub a7, a3, a2
+; RV32I-NEXT: sub a7, t2, a7
; RV32I-NEXT: srli t3, a5, 1
; RV32I-NEXT: srl a7, t3, a7
; RV32I-NEXT: or t6, a6, a7
@@ -206,7 +208,6 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: bgez t1, .LBB6_15
; RV32I-NEXT: .LBB6_14:
-; RV32I-NEXT: sll t0, a5, t0
; RV32I-NEXT: or t5, t5, t0
; RV32I-NEXT: .LBB6_15:
; RV32I-NEXT: slli t0, a4, 1
@@ -289,18 +290,20 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a2, 0(a2)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: neg a6, a2
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: sub t1, a3, a2
-; RV32I-NEXT: li a6, 32
-; RV32I-NEXT: sub t2, a6, a2
-; RV32I-NEXT: li t4, 31
+; RV32I-NEXT: li t3, 31
+; RV32I-NEXT: li a7, 32
+; RV32I-NEXT: sub t2, a7, a2
+; RV32I-NEXT: sll t1, a5, a6
; RV32I-NEXT: bltz t2, .LBB7_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll s0, a5, t2
+; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: sll a6, a4, t1
-; RV32I-NEXT: sub a7, t4, t1
+; RV32I-NEXT: sll a6, a4, a6
+; RV32I-NEXT: sub a7, a3, a2
+; RV32I-NEXT: sub a7, t3, a7
; RV32I-NEXT: srli t0, a5, 1
; RV32I-NEXT: srl a7, t0, a7
; RV32I-NEXT: or s0, a6, a7
@@ -312,7 +315,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: srl a7, t6, a2
; RV32I-NEXT: or s0, s0, a7
; RV32I-NEXT: .LBB7_5:
-; RV32I-NEXT: addi t3, a2, -64
+; RV32I-NEXT: addi t4, a2, -64
; RV32I-NEXT: addi t5, a2, -96
; RV32I-NEXT: srai a7, a4, 31
; RV32I-NEXT: bltz t5, .LBB7_7
@@ -321,7 +324,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bgeu a2, a3, .LBB7_8
; RV32I-NEXT: j .LBB7_9
; RV32I-NEXT: .LBB7_7:
-; RV32I-NEXT: sra t0, a4, t3
+; RV32I-NEXT: sra t0, a4, t4
; RV32I-NEXT: bltu a2, a3, .LBB7_9
; RV32I-NEXT: .LBB7_8:
; RV32I-NEXT: mv s0, t0
@@ -332,7 +335,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: mv t0, s0
; RV32I-NEXT: .LBB7_11:
; RV32I-NEXT: lw a1, 0(a1)
-; RV32I-NEXT: sub t4, t4, a2
+; RV32I-NEXT: sub t3, t3, a2
; RV32I-NEXT: bltz a6, .LBB7_13
; RV32I-NEXT: # %bb.12:
; RV32I-NEXT: srl t6, t6, a6
@@ -341,11 +344,10 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: .LBB7_13:
; RV32I-NEXT: srl s0, a1, a2
; RV32I-NEXT: slli t6, t6, 1
-; RV32I-NEXT: sll t6, t6, t4
+; RV32I-NEXT: sll t6, t6, t3
; RV32I-NEXT: or t6, s0, t6
; RV32I-NEXT: bgez t2, .LBB7_15
; RV32I-NEXT: .LBB7_14:
-; RV32I-NEXT: sll t1, a5, t1
; RV32I-NEXT: or t6, t6, t1
; RV32I-NEXT: .LBB7_15:
; RV32I-NEXT: slli t1, a4, 1
@@ -358,8 +360,8 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: li t2, 95
; RV32I-NEXT: sub t2, t2, a2
; RV32I-NEXT: sll t2, t1, t2
-; RV32I-NEXT: srl t3, a5, t3
-; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: srl t4, a5, t4
+; RV32I-NEXT: or t2, t4, t2
; RV32I-NEXT: bltu a2, a3, .LBB7_19
; RV32I-NEXT: .LBB7_18:
; RV32I-NEXT: mv t6, t2
@@ -376,7 +378,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bgez a6, .LBB7_21
; RV32I-NEXT: .LBB7_23:
; RV32I-NEXT: srl a5, a5, a2
-; RV32I-NEXT: sll t1, t1, t4
+; RV32I-NEXT: sll t1, t1, t3
; RV32I-NEXT: or a5, a5, t1
; RV32I-NEXT: bltu a2, a3, .LBB7_25
; RV32I-NEXT: .LBB7_24:
@@ -428,18 +430,20 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a2, 0(a2)
; RV32I-NEXT: lw a5, 4(a1)
; RV32I-NEXT: lw a4, 0(a1)
+; RV32I-NEXT: neg a6, a2
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: sub t0, a3, a2
-; RV32I-NEXT: li a6, 32
-; RV32I-NEXT: sub t1, a6, a2
; RV32I-NEXT: li t2, 31
+; RV32I-NEXT: li a7, 32
+; RV32I-NEXT: sub t1, a7, a2
+; RV32I-NEXT: srl t0, a5, a6
; RV32I-NEXT: bltz t1, .LBB8_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl t6, a5, t1
+; RV32I-NEXT: mv t6, t0
; RV32I-NEXT: j .LBB8_3
; RV32I-NEXT: .LBB8_2:
-; RV32I-NEXT: srl a6, a4, t0
-; RV32I-NEXT: sub a7, t2, t0
+; RV32I-NEXT: srl a6, a4, a6
+; RV32I-NEXT: sub a7, a3, a2
+; RV32I-NEXT: sub a7, t2, a7
; RV32I-NEXT: slli t3, a5, 1
; RV32I-NEXT: sll a7, t3, a7
; RV32I-NEXT: or t6, a6, a7
@@ -483,7 +487,6 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: bgez t1, .LBB8_15
; RV32I-NEXT: .LBB8_14:
-; RV32I-NEXT: srl t0, a5, t0
; RV32I-NEXT: or t5, t5, t0
; RV32I-NEXT: .LBB8_15:
; RV32I-NEXT: srli t0, a4, 1
More information about the llvm-commits
mailing list