[llvm] [DAG] Remove OneUse restriction when folding (shl (add x, c1), c2) (PR #69105)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 15 04:30:46 PDT 2023
https://github.com/LiqinWeng created https://github.com/llvm/llvm-project/pull/69105
None
>From 5251cd294279ee8c056a1e1f4c7f14f6b41f06b4 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Sun, 15 Oct 2023 18:45:49 +0800
Subject: [PATCH] [DAG] Remove OneUse restriction when folding (shl (add x,
c1), c2)
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 5 +
.../Target/Hexagon/HexagonISelLowering.cpp | 10 +
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 3 +
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +
llvm/lib/Target/X86/X86ISelLowering.h | 3 +
llvm/test/CodeGen/ARM/add-like-or.ll | 21 +-
llvm/test/CodeGen/Hexagon/isel-store-rr-i1.ll | 1 -
.../CodeGen/RISCV/riscv-shifted-extend.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll | 14 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 200 +++++++++---------
llvm/test/CodeGen/Thumb2/pr52817.ll | 30 +--
15 files changed, 189 insertions(+), 137 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 73438113651f55d..4981c37f8f2cb3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10035,7 +10035,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 64d00dafd835b11..e8f7e4469dc283e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15787,6 +15787,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (!ShiftLHS->hasOneUse())
+ return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 607d59db7bcf709..b24994812888897 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -959,6 +959,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ if (!N->getOperand(0).hasOneUse()) {
+ return false;
+ }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e950b44341c9225..785aa78e2941bab 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2140,6 +2140,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ if (!N->getOperand(0)->hasOneUse())
+ return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8c7d0b70f385782..1353185e6d4e43f 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -154,6 +154,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5e0c2d62f5a9cb5..9d31af419d75cdf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16976,6 +16976,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ if (!N->getOperand(0).hasOneUse()) {
+ return false;
+ }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d8679dcf4018083..30611c99b8f92ac 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1053,6 +1053,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 13684babb2385ea..5c4698fe7189302 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3304,6 +3304,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ if (!N->getOperand(0)->hasOneUse())
+ return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8046f42736951cd..16fa8ae8a6467a1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1167,6 +1167,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb42f..f723713e77d0822 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/Hexagon/isel-store-rr-i1.ll b/llvm/test/CodeGen/Hexagon/isel-store-rr-i1.ll
index c4a23ab45861613..c298d928438243a 100644
--- a/llvm/test/CodeGen/Hexagon/isel-store-rr-i1.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-store-rr-i1.ll
@@ -3,7 +3,6 @@
target triple = "hexagon-unknown-linux-gnu"
define i32 @f0(float %a0, double %a1, i1 %a2, i16 %a3, i8 %a4) {
-; CHECK-LABEL: f0:
; CHECK: memb(r1+r0<<#2) = r2
b0:
%v0 = alloca double, align 8
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669dea..28cf3cb597478f2 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 0fda7909df3134f..430722cedc7f028 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%head_six = insertelement <vscale x 4 x i8> poison, i8 6, i32 0
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 6ed352b51f25459..8cfb36b899b0562 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -310,64 +310,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3 at plt
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3 at plt
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3 at plt
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -389,23 +391,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3 at plt
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3 at plt
; RV64-NEXT: mv s1, a0
@@ -422,25 +424,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -462,64 +465,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3 at plt
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3 at plt
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3 at plt
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -581,22 +586,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srli a3, a3, 62
-; RV64M-NEXT: or a1, a3, a1
-; RV64M-NEXT: sw a1, 8(a0)
; RV64M-NEXT: slli a2, a2, 29
; RV64M-NEXT: srli a2, a2, 61
; RV64M-NEXT: sb a2, 12(a0)
+; RV64M-NEXT: slli a3, a3, 31
+; RV64M-NEXT: srli a3, a3, 62
+; RV64M-NEXT: or a3, a4, a3
+; RV64M-NEXT: addi a3, a3, -4
+; RV64M-NEXT: sw a3, 8(a0)
; RV64M-NEXT: ret
;
; RV32MV-LABEL: test_srem_vec:
diff --git a/llvm/test/CodeGen/Thumb2/pr52817.ll b/llvm/test/CodeGen/Thumb2/pr52817.ll
index 87615f0a1f7ef45..abd0f5e5d186a11 100644
--- a/llvm/test/CodeGen/Thumb2/pr52817.ll
+++ b/llvm/test/CodeGen/Thumb2/pr52817.ll
@@ -17,31 +17,31 @@ define i32 @test(ptr %arg, ptr %arg1, ptr %arg2) #0 !dbg !6 {
; CHECK-NEXT: @ %bb.0: @ %bb
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: add r7, sp, #12
-; CHECK-NEXT: str r8, [sp, #-4]!
; CHECK-NEXT: mov.w lr, #0
; CHECK-NEXT: mov.w r9, #1
-; CHECK-NEXT: movw r12, #4100
+; CHECK-NEXT: movw r12, #4104
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: LBB0_1: @ %bb3
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: adds r5, r3, #1
+; CHECK-NEXT: lsl.w r6, r9, r3
+; CHECK-NEXT: add.w r4, r0, r3, lsl #2
+; CHECK-NEXT: ands r6, r3
+; CHECK-NEXT: adds r3, #1
; CHECK-NEXT: str.w lr, [r2]
+; CHECK-NEXT: add r4, r12
+; CHECK-NEXT: add.w r3, r1, r3, lsl #2
; CHECK-NEXT: cmp.w lr, #0
-; CHECK-NEXT: add.w r4, r0, r5, lsl #2
-; CHECK-NEXT: add.w r8, r4, r12
-; CHECK-NEXT: lsl.w r4, r9, r3
-; CHECK-NEXT: and.w r3, r3, r4
-; CHECK-NEXT: add.w r4, r1, r5, lsl #2
+; CHECK-NEXT: @DEBUG_VALUE: test:this <- [DW_OP_LLVM_arg 0, DW_OP_plus_uconst 135168, DW_OP_LLVM_arg 0, DW_OP_constu 4, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst 4, DW_OP_stack_value] undef
; CHECK-NEXT: itte ne
-; CHECK-NEXT: movne r6, #0
+; CHECK-NEXT: movne r5, #0
; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT: @DEBUG_VALUE: test:this <- [DW_OP_LLVM_arg 0, DW_OP_plus_uconst 135168, DW_OP_LLVM_arg 1, DW_OP_constu 4, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst 4, DW_OP_stack_value] $r0, $r5
; CHECK-NEXT: .loc 1 28 24 prologue_end @ test.cpp:28:24
-; CHECK-NEXT: strne.w r6, [r8]
-; CHECK-NEXT: moveq r6, #1
-; CHECK-NEXT: ldr r4, [r4, #4]
-; CHECK-NEXT: orrs r4, r6
-; CHECK-NEXT: str.w r4, [r8]
+; CHECK-NEXT: strne r5, [r4]
+; CHECK-NEXT: moveq r5, #1
+; CHECK-NEXT: ldr r3, [r3, #4]
+; CHECK-NEXT: orrs r3, r5
+; CHECK-NEXT: str r3, [r4]
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: b LBB0_1
; CHECK-NEXT: Ltmp1:
; CHECK-NEXT: Lfunc_end0:
More information about the llvm-commits
mailing list