[llvm] dcfc1fd - [SelectionDAG][RISCV][AMDGPU][ARM] Improve SimplifyDemandedBits for SHL with variable shift amount.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 14 16:10:31 PDT 2022
Author: Craig Topper
Date: 2022-07-14T16:10:14-07:00
New Revision: dcfc1fd26f6c2e6ecb1f8f9ae1b77d7b5c30c434
URL: https://github.com/llvm/llvm-project/commit/dcfc1fd26f6c2e6ecb1f8f9ae1b77d7b5c30c434
DIFF: https://github.com/llvm/llvm-project/commit/dcfc1fd26f6c2e6ecb1f8f9ae1b77d7b5c30c434.diff
LOG: [SelectionDAG][RISCV][AMDGPU][ARM] Improve SimplifyDemandedBits for SHL with variable shift amount.
If we have a variable shift amount and the demanded mask has leading
zeros, we can propagate those leading zeros to not demand those bits
from operand 0. This can allow zero_extend/sign_extend to become
any_extend. This pattern can occur due to C integer promotion rules.
This transform is already done by InstCombineSimplifyDemanded.cpp where
sign_extend can be turned into zero_extend for example.
Reviewed By: spatel, foad
Differential Revision: https://reviews.llvm.org/D121833
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/shl.ll
llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
llvm/test/CodeGen/RISCV/alu16.ll
llvm/test/CodeGen/RISCV/alu8.ll
llvm/test/CodeGen/RISCV/shl-demanded.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 66389a57f780..f61b5256d46b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1723,6 +1723,26 @@ bool TargetLowering::SimplifyDemandedBits(
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+ } else {
+ // This is a variable shift, so we can't shift the demand mask by a known
+ // amount. But if we are not demanding high bits, then we are not
+ // demanding those bits from the pre-shifted operand either.
+ if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
+ APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
+ if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
+ return true;
+ }
+ Known.resetAll();
+ }
}
// If we are only demanding sign bits then we can use the shift source
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index a2f169a83b3e..8e59750efa78 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -589,7 +589,7 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 0, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @10
-; EG-NEXT: ALU 12, @16, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 11, @16, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -604,13 +604,12 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
; EG-NEXT: ALU clause starting at 15:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 16:
-; EG-NEXT: AND_INT T0.Y, T0.X, literal.x,
-; EG-NEXT: AND_INT T0.Z, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
; EG-NEXT: LSHR T0.W, T0.X, literal.y,
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
; EG-NEXT: LSHL T0.W, PS, PV.W,
-; EG-NEXT: LSHL * T1.W, PV.Z, PV.Y,
+; EG-NEXT: LSHL * T1.W, T7.X, PV.Z,
; EG-NEXT: AND_INT T1.W, PS, literal.x,
; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
@@ -684,7 +683,7 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 53, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -703,10 +702,9 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
; EG-NEXT: MOV T3.X, T10.W,
; EG-NEXT: MOV * T0.Z, T6.X,
; EG-NEXT: MOV * T1.Y, T2.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PS, PV.W,
+; EG-NEXT: LSHL * T1.W, T0.X, PV.W,
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
; EG-NEXT: AND_INT * T2.W, T0.Z, literal.y,
; EG-NEXT: 65535(9.183409e-41), -65536(nan)
@@ -725,10 +723,9 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
; EG-NEXT: MOV T6.X, PV.W,
; EG-NEXT: MOV * T0.X, T7.X,
-; EG-NEXT: AND_INT T1.W, T0.Z, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL T1.W, PS, PV.W,
+; EG-NEXT: LSHL T1.W, T0.Y, PV.W,
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
; EG-NEXT: -65536(nan), 0(0.000000e+00)
; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index 11a173a27161..f1d8073afeac 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -25,14 +25,13 @@ define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_and_b32 s6, s4, 0xffff
-; VI-NEXT: s_lshr_b32 s4, s4, 16
+; VI-NEXT: s_lshr_b32 s6, s4, 16
; VI-NEXT: s_lshr_b32 s7, s5, 16
-; VI-NEXT: s_lshl_b32 s4, s4, s7
-; VI-NEXT: s_lshl_b32 s5, s6, s5
-; VI-NEXT: s_lshl_b32 s4, s4, 16
-; VI-NEXT: s_and_b32 s5, s5, 0xffff
-; VI-NEXT: s_or_b32 s4, s5, s4
+; VI-NEXT: s_lshl_b32 s6, s6, s7
+; VI-NEXT: s_lshl_b32 s4, s4, s5
+; VI-NEXT: s_lshl_b32 s5, s6, 16
+; VI-NEXT: s_and_b32 s4, s4, 0xffff
+; VI-NEXT: s_or_b32 s4, s4, s5
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/RISCV/alu16.ll b/llvm/test/CodeGen/RISCV/alu16.ll
index ef9726fe28d6..e6a132ce3ecc 100644
--- a/llvm/test/CodeGen/RISCV/alu16.ll
+++ b/llvm/test/CodeGen/RISCV/alu16.ll
@@ -213,16 +213,12 @@ define i16 @sll(i16 %a, i16 %b) nounwind {
define void @sll_ext(i16 %a, i32 signext %b, i16* %p) nounwind {
; RV32I-LABEL: sll_ext:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
@@ -238,16 +234,12 @@ define void @sll_ext(i16 %a, i32 signext %b, i16* %p) nounwind {
define void @sll_ext_drop_poison(i16 %a, i32 signext %b, i16* %p) nounwind {
; RV32I-LABEL: sll_ext_drop_poison:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext_drop_poison:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll
index 52a843ad7b3f..29fc14b2bfa5 100644
--- a/llvm/test/CodeGen/RISCV/alu8.ll
+++ b/llvm/test/CodeGen/RISCV/alu8.ll
@@ -211,14 +211,12 @@ define i8 @sll(i8 %a, i8 %b) nounwind {
define void @sll_ext(i8 %a, i32 signext %b, i8* %p) nounwind {
; RV32I-LABEL: sll_ext:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 255
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
@@ -234,14 +232,12 @@ define void @sll_ext(i8 %a, i32 signext %b, i8* %p) nounwind {
define void @sll_ext_drop_poison(i8 %a, i32 signext %b, i8* %p) nounwind {
; RV32I-LABEL: sll_ext_drop_poison:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext_drop_poison:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 255
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/shl-demanded.ll b/llvm/test/CodeGen/RISCV/shl-demanded.ll
index a902c39787f1..b1228cb001d5 100644
--- a/llvm/test/CodeGen/RISCV/shl-demanded.ll
+++ b/llvm/test/CodeGen/RISCV/shl-demanded.ll
@@ -7,16 +7,12 @@
define void @sext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
; RV32I-LABEL: sext_shl_trunc_same_size:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sext_shl_trunc_same_size:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
@@ -30,16 +26,12 @@ define void @sext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
define void @zext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
; RV32I-LABEL: zext_shl_trunc_same_size:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_trunc_same_size:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
@@ -53,16 +45,12 @@ define void @zext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
define void @sext_shl_trunc_smaller(i16 %x, i32 %y, i8* %res) {
; RV32I-LABEL: sext_shl_trunc_smaller:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sext_shl_trunc_smaller:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
@@ -76,16 +64,12 @@ define void @sext_shl_trunc_smaller(i16 %x, i32 %y, i8* %res) {
define void @zext_shl_trunc_smaller(i16 %x, i32 %y, i8* %res) {
; RV32I-LABEL: zext_shl_trunc_smaller:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_trunc_smaller:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
@@ -151,8 +135,6 @@ define zeroext i17 @zext_shl_trunc_larger(i16 %x, i32 %y) {
define i32 @sext_shl_mask(i16 %x, i32 %y) {
; RV32I-LABEL: sext_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
@@ -160,8 +142,6 @@ define i32 @sext_shl_mask(i16 %x, i32 %y) {
;
; RV64I-LABEL: sext_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
@@ -175,20 +155,16 @@ define i32 @sext_shl_mask(i16 %x, i32 %y) {
define i32 @zext_shl_mask(i16 %x, i32 %y) {
; RV32I-LABEL: zext_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -1
-; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
%conv = zext i16 %x to i32
%shl = shl i32 %conv, %y
@@ -253,22 +229,20 @@ define i32 @zext_shl_mask_higher(i16 %x, i32 %y) {
define i32 @set_shl_mask(i32 %x, i32 %y) {
; RV32I-LABEL: set_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 48
-; RV32I-NEXT: addi a2, a2, 1
-; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a3, a2, 1
+; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: lui a1, 16
-; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: set_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a2, 48
-; RV64I-NEXT: addiw a2, a2, 1
-; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a3, a2, 1
+; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: ret
%z = or i32 %x, 196609
%s = shl i32 %z, %y
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index fba800bc3a5f..845436ea301b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -160,80 +160,81 @@ define dso_local i32 @b(i32* %c, i32 %d, i32 %e, i32* %n) "frame-pointer"="all"
; CHECK-NEXT: add r7, sp, #12
; CHECK-NEXT: .save {r8, r9, r10, r11}
; CHECK-NEXT: push.w {r8, r9, r10, r11}
-; CHECK-NEXT: .pad #12
-; CHECK-NEXT: sub sp, #12
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: wls lr, r1, .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: adds r2, r3, #4
-; CHECK-NEXT: add.w r9, r0, #4
-; CHECK-NEXT: mvn r11, #1
-; CHECK-NEXT: @ implicit-def: $r6
-; CHECK-NEXT: @ implicit-def: $r12
-; CHECK-NEXT: str r4, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: add.w r10, r3, #4
+; CHECK-NEXT: adds r0, #4
+; CHECK-NEXT: mvn r9, #1
+; CHECK-NEXT: @ implicit-def: $r8
+; CHECK-NEXT: @ implicit-def: $r4
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: .LBB2_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r1, [r9, #-4]
-; CHECK-NEXT: ldr.w r10, [r2]
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: muls r1, r3, r1
-; CHECK-NEXT: adds.w r8, r1, #-2147483648
-; CHECK-NEXT: asr.w r5, r1, #31
-; CHECK-NEXT: adc r1, r5, #0
-; CHECK-NEXT: mul r5, r10, r0
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: ldr.w r2, [r11, #4]
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: add.w r5, r5, #-2147483648
-; CHECK-NEXT: asrl r8, r1, r5
-; CHECK-NEXT: smull r4, r5, r10, r8
-; CHECK-NEXT: lsll r4, r5, #30
-; CHECK-NEXT: asrs r1, r5, #31
-; CHECK-NEXT: mov r4, r5
-; CHECK-NEXT: lsll r4, r1, r10
-; CHECK-NEXT: lsll r4, r1, #30
-; CHECK-NEXT: ldr.w r4, [r11]
-; CHECK-NEXT: asrs r5, r1, #31
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: muls r4, r6, r4
-; CHECK-NEXT: adds r4, #2
-; CHECK-NEXT: lsll r8, r5, r4
-; CHECK-NEXT: ldr r4, [r9], #4
-; CHECK-NEXT: asr.w r5, r12, #31
-; CHECK-NEXT: add.w r8, r8, #-2147483648
+; CHECK-NEXT: ldr r2, [r0]
+; CHECK-NEXT: asrs r5, r4, #31
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: muls r2, r3, r2
+; CHECK-NEXT: adds r4, r4, r2
+; CHECK-NEXT: adc.w r2, r5, r2, asr #31
+; CHECK-NEXT: ldr.w r5, [r9, #4]
+; CHECK-NEXT: adds.w r4, r4, #-2147483648
+; CHECK-NEXT: adc r1, r2, #0
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: smull r5, r6, r5, r8
+; CHECK-NEXT: ldr.w r2, [r9]
+; CHECK-NEXT: asrs r4, r1, #31
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: subs r5, r1, r5
+; CHECK-NEXT: sbcs r4, r6
+; CHECK-NEXT: adds.w r6, r5, #-2147483648
+; CHECK-NEXT: adc r5, r4, #0
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: muls r4, r3, r4
; CHECK-NEXT: adds r3, #4
-; CHECK-NEXT: adds.w r1, r12, r4
-; CHECK-NEXT: adc.w r5, r5, r4, asr #31
-; CHECK-NEXT: smull r6, r4, r2, r6
-; CHECK-NEXT: adds.w r1, r1, #-2147483648
-; CHECK-NEXT: adc r1, r5, #0
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: asrs r5, r1, #31
-; CHECK-NEXT: subs r6, r1, r6
-; CHECK-NEXT: sbcs r5, r4
-; CHECK-NEXT: adds.w r6, r6, #-2147483648
-; CHECK-NEXT: adc r5, r5, #0
-; CHECK-NEXT: asrl r6, r5, r8
+; CHECK-NEXT: adds.w r0, r4, #-2147483648
+; CHECK-NEXT: asr.w r1, r4, #31
+; CHECK-NEXT: ldr.w r4, [r10]
+; CHECK-NEXT: adc r1, r1, #0
+; CHECK-NEXT: mul r2, r4, r12
+; CHECK-NEXT: add.w r12, r12, #4
+; CHECK-NEXT: add.w r2, r2, #-2147483648
+; CHECK-NEXT: asrl r0, r1, r2
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: smull r0, r1, r4, r0
+; CHECK-NEXT: lsll r0, r1, #30
+; CHECK-NEXT: asr.w r11, r1, #31
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: lsll r0, r11, r4
+; CHECK-NEXT: lsrl r0, r11, #2
+; CHECK-NEXT: mul r1, r1, r8
+; CHECK-NEXT: adds r1, #2
+; CHECK-NEXT: lsll r0, r11, r1
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: add.w r0, r0, #-2147483648
+; CHECK-NEXT: asrl r6, r5, r0
+; CHECK-NEXT: movs r0, #2
; CHECK-NEXT: lsrl r6, r5, #2
-; CHECK-NEXT: movs r5, #2
-; CHECK-NEXT: str r6, [r5]
-; CHECK-NEXT: ldr r5, [r11], #-4
-; CHECK-NEXT: mls r1, r5, r10, r1
-; CHECK-NEXT: adds.w r12, r1, #-2147483648
-; CHECK-NEXT: asr.w r4, r1, #31
-; CHECK-NEXT: adc r1, r4, #0
-; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT: lsrl r12, r1, #2
-; CHECK-NEXT: rsb.w r1, r12, #0
+; CHECK-NEXT: str r6, [r0]
+; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: ldr r0, [r9], #-4
+; CHECK-NEXT: mls r0, r0, r4, r1
+; CHECK-NEXT: adds.w r4, r0, #-2147483648
+; CHECK-NEXT: asr.w r1, r0, #31
+; CHECK-NEXT: adc r1, r1, #0
+; CHECK-NEXT: lsrl r4, r1, #2
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: str r0, [r2]
+; CHECK-NEXT: str r0, [r10, #-4]
+; CHECK-NEXT: add.w r10, r10, #4
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: adds r0, #4
-; CHECK-NEXT: str r1, [r4]
-; CHECK-NEXT: str r1, [r2, #-4]
-; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: .LBB2_3: @ %while.end
-; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: pop.w {r8, r9, r10, r11}
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
More information about the llvm-commits
mailing list