[llvm] [DAG] Remove OneUse restriction when folding (shl (add x, c1), c2) (PR #101294)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 23:57:00 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: LiqinWeng (LiqinWeng)
<details>
<summary>Changes</summary>
---
Patch is 22.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101294.diff
14 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+5)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.cpp (+10)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.h (+3)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+12)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+3)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+11)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+3)
- (modified) llvm/test/CodeGen/ARM/add-like-or.ll (+11-10)
- (modified) llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll (+2-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+103-97)
- (modified) llvm/test/CodeGen/Thumb2/pr52817.ll (+15-15)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b35d08b327ef3..e6d0bd2495f7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e9da9b819bdd..9bcf6a2f67056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // if (!ShiftLHS->hasOneUse())
+ // return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ad91de566323..7eeb4b71b5d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 7aeaebc584c64..1a8e123246a07 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3fd961f5a7462..a6bd57630031c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1686ec572c855..5de33627886a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f..2d42353adafa3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1064,6 +1064,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b971afda4229a..fc6d90543ef86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4dccb9903df5d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1181,6 +1181,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb4..f723713e77d08 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669d..28cf3cb597478 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 253cfb040308b..d313f188568d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a8..55de5f011a620 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -314,64 +314,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,23 +395,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
@@ -426,25 +428,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -466,64 +469,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -585,22 +590,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srl...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101294
More information about the llvm-commits
mailing list