[llvm] Revert "[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default" (PR #98328)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 07:30:00 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-llvm-transforms
Author: Alex Bradbury (asb)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->89927 while we investigate performance regressions reported by @<!-- -->dtcxzyw
---
Patch is 67.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98328.diff
9 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll (+70-67)
- (modified) llvm/test/CodeGen/RISCV/rvv/pr95865.ll (+98-53)
- (modified) llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll (+267-251)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+20-20)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll (+3-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll (+15-14)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll (+2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll (+35-15)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7b239b8fc17a3..9c37a4f6ec2d0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,8 +398,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return true;
}
- bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
-
std::optional<unsigned> getMinPageSize() const { return 4096; }
};
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index 92639be0017e8..2b4b8e979f3d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -86,29 +86,30 @@ declare i64 @llvm.vscale.i64()
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; NO-SINK-LABEL: sink_splat_add_scalable:
; NO-SINK: # %bb.0: # %entry
-; NO-SINK-NEXT: csrr a2, vlenb
-; NO-SINK-NEXT: srli a2, a2, 1
+; NO-SINK-NEXT: csrr a5, vlenb
+; NO-SINK-NEXT: srli a2, a5, 1
; NO-SINK-NEXT: li a3, 1024
; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
; NO-SINK-NEXT: # %bb.1:
; NO-SINK-NEXT: li a3, 0
; NO-SINK-NEXT: j .LBB1_5
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
-; NO-SINK-NEXT: li a5, 0
; NO-SINK-NEXT: addi a3, a2, -1
; NO-SINK-NEXT: andi a4, a3, 1024
; NO-SINK-NEXT: xori a3, a4, 1024
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; NO-SINK-NEXT: vmv.v.x v8, a1
+; NO-SINK-NEXT: slli a5, a5, 1
+; NO-SINK-NEXT: mv a6, a0
+; NO-SINK-NEXT: mv a7, a3
; NO-SINK-NEXT: .LBB1_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; NO-SINK-NEXT: slli a6, a5, 2
-; NO-SINK-NEXT: add a6, a0, a6
; NO-SINK-NEXT: vl2re32.v v10, (a6)
; NO-SINK-NEXT: vadd.vv v10, v10, v8
-; NO-SINK-NEXT: add a5, a5, a2
; NO-SINK-NEXT: vs2r.v v10, (a6)
-; NO-SINK-NEXT: bne a5, a3, .LBB1_3
+; NO-SINK-NEXT: sub a7, a7, a2
+; NO-SINK-NEXT: add a6, a6, a5
+; NO-SINK-NEXT: bnez a7, .LBB1_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
; NO-SINK-NEXT: beqz a4, .LBB1_7
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -128,28 +129,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; SINK-LABEL: sink_splat_add_scalable:
; SINK: # %bb.0: # %entry
-; SINK-NEXT: csrr a2, vlenb
-; SINK-NEXT: srli a2, a2, 1
+; SINK-NEXT: csrr a5, vlenb
+; SINK-NEXT: srli a2, a5, 1
; SINK-NEXT: li a3, 1024
; SINK-NEXT: bgeu a3, a2, .LBB1_2
; SINK-NEXT: # %bb.1:
; SINK-NEXT: li a3, 0
; SINK-NEXT: j .LBB1_5
; SINK-NEXT: .LBB1_2: # %vector.ph
-; SINK-NEXT: li a5, 0
; SINK-NEXT: addi a3, a2, -1
; SINK-NEXT: andi a4, a3, 1024
; SINK-NEXT: xori a3, a4, 1024
-; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; SINK-NEXT: slli a5, a5, 1
+; SINK-NEXT: mv a6, a0
+; SINK-NEXT: mv a7, a3
+; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; SINK-NEXT: .LBB1_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; SINK-NEXT: slli a6, a5, 2
-; SINK-NEXT: add a6, a0, a6
; SINK-NEXT: vl2re32.v v8, (a6)
; SINK-NEXT: vadd.vx v8, v8, a1
-; SINK-NEXT: add a5, a5, a2
; SINK-NEXT: vs2r.v v8, (a6)
-; SINK-NEXT: bne a5, a3, .LBB1_3
+; SINK-NEXT: sub a7, a7, a2
+; SINK-NEXT: add a6, a6, a5
+; SINK-NEXT: bnez a7, .LBB1_3
; SINK-NEXT: # %bb.4: # %middle.block
; SINK-NEXT: beqz a4, .LBB1_7
; SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -169,28 +171,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; DEFAULT-LABEL: sink_splat_add_scalable:
; DEFAULT: # %bb.0: # %entry
-; DEFAULT-NEXT: csrr a2, vlenb
-; DEFAULT-NEXT: srli a2, a2, 1
+; DEFAULT-NEXT: csrr a5, vlenb
+; DEFAULT-NEXT: srli a2, a5, 1
; DEFAULT-NEXT: li a3, 1024
; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
; DEFAULT-NEXT: # %bb.1:
; DEFAULT-NEXT: li a3, 0
; DEFAULT-NEXT: j .LBB1_5
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
-; DEFAULT-NEXT: li a5, 0
; DEFAULT-NEXT: addi a3, a2, -1
; DEFAULT-NEXT: andi a4, a3, 1024
; DEFAULT-NEXT: xori a3, a4, 1024
-; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; DEFAULT-NEXT: slli a5, a5, 1
+; DEFAULT-NEXT: mv a6, a0
+; DEFAULT-NEXT: mv a7, a3
+; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; DEFAULT-NEXT: .LBB1_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
-; DEFAULT-NEXT: slli a6, a5, 2
-; DEFAULT-NEXT: add a6, a0, a6
; DEFAULT-NEXT: vl2re32.v v8, (a6)
; DEFAULT-NEXT: vadd.vx v8, v8, a1
-; DEFAULT-NEXT: add a5, a5, a2
; DEFAULT-NEXT: vs2r.v v8, (a6)
-; DEFAULT-NEXT: bne a5, a3, .LBB1_3
+; DEFAULT-NEXT: sub a7, a7, a2
+; DEFAULT-NEXT: add a6, a6, a5
+; DEFAULT-NEXT: bnez a7, .LBB1_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
; DEFAULT-NEXT: beqz a4, .LBB1_7
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
@@ -404,32 +407,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; NO-SINK-LABEL: sink_splat_fadd_scalable:
; NO-SINK: # %bb.0: # %entry
; NO-SINK-NEXT: csrr a1, vlenb
-; NO-SINK-NEXT: srli a1, a1, 2
-; NO-SINK-NEXT: li a2, 1024
-; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2
+; NO-SINK-NEXT: srli a2, a1, 2
+; NO-SINK-NEXT: li a3, 1024
+; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
; NO-SINK-NEXT: # %bb.1:
-; NO-SINK-NEXT: li a2, 0
+; NO-SINK-NEXT: li a3, 0
; NO-SINK-NEXT: j .LBB4_5
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
-; NO-SINK-NEXT: li a4, 0
-; NO-SINK-NEXT: addi a2, a1, -1
-; NO-SINK-NEXT: andi a3, a2, 1024
-; NO-SINK-NEXT: xori a2, a3, 1024
+; NO-SINK-NEXT: addi a3, a2, -1
+; NO-SINK-NEXT: andi a4, a3, 1024
+; NO-SINK-NEXT: xori a3, a4, 1024
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; NO-SINK-NEXT: vfmv.v.f v8, fa0
+; NO-SINK-NEXT: mv a5, a0
+; NO-SINK-NEXT: mv a6, a3
; NO-SINK-NEXT: .LBB4_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; NO-SINK-NEXT: slli a5, a4, 2
-; NO-SINK-NEXT: add a5, a0, a5
; NO-SINK-NEXT: vl1re32.v v9, (a5)
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
-; NO-SINK-NEXT: add a4, a4, a1
; NO-SINK-NEXT: vs1r.v v9, (a5)
-; NO-SINK-NEXT: bne a4, a2, .LBB4_3
+; NO-SINK-NEXT: sub a6, a6, a2
+; NO-SINK-NEXT: add a5, a5, a1
+; NO-SINK-NEXT: bnez a6, .LBB4_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
-; NO-SINK-NEXT: beqz a3, .LBB4_7
+; NO-SINK-NEXT: beqz a4, .LBB4_7
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT: slli a1, a2, 2
+; NO-SINK-NEXT: slli a1, a3, 2
; NO-SINK-NEXT: add a1, a0, a1
; NO-SINK-NEXT: lui a2, 1
; NO-SINK-NEXT: add a0, a0, a2
@@ -446,31 +449,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; SINK-LABEL: sink_splat_fadd_scalable:
; SINK: # %bb.0: # %entry
; SINK-NEXT: csrr a1, vlenb
-; SINK-NEXT: srli a1, a1, 2
-; SINK-NEXT: li a2, 1024
-; SINK-NEXT: bgeu a2, a1, .LBB4_2
+; SINK-NEXT: srli a2, a1, 2
+; SINK-NEXT: li a3, 1024
+; SINK-NEXT: bgeu a3, a2, .LBB4_2
; SINK-NEXT: # %bb.1:
-; SINK-NEXT: li a2, 0
+; SINK-NEXT: li a3, 0
; SINK-NEXT: j .LBB4_5
; SINK-NEXT: .LBB4_2: # %vector.ph
-; SINK-NEXT: li a4, 0
-; SINK-NEXT: addi a2, a1, -1
-; SINK-NEXT: andi a3, a2, 1024
-; SINK-NEXT: xori a2, a3, 1024
-; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
+; SINK-NEXT: addi a3, a2, -1
+; SINK-NEXT: andi a4, a3, 1024
+; SINK-NEXT: xori a3, a4, 1024
+; SINK-NEXT: mv a5, a0
+; SINK-NEXT: mv a6, a3
+; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; SINK-NEXT: .LBB4_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; SINK-NEXT: slli a5, a4, 2
-; SINK-NEXT: add a5, a0, a5
; SINK-NEXT: vl1re32.v v8, (a5)
; SINK-NEXT: vfadd.vf v8, v8, fa0
-; SINK-NEXT: add a4, a4, a1
; SINK-NEXT: vs1r.v v8, (a5)
-; SINK-NEXT: bne a4, a2, .LBB4_3
+; SINK-NEXT: sub a6, a6, a2
+; SINK-NEXT: add a5, a5, a1
+; SINK-NEXT: bnez a6, .LBB4_3
; SINK-NEXT: # %bb.4: # %middle.block
-; SINK-NEXT: beqz a3, .LBB4_7
+; SINK-NEXT: beqz a4, .LBB4_7
; SINK-NEXT: .LBB4_5: # %for.body.preheader
-; SINK-NEXT: slli a1, a2, 2
+; SINK-NEXT: slli a1, a3, 2
; SINK-NEXT: add a1, a0, a1
; SINK-NEXT: lui a2, 1
; SINK-NEXT: add a0, a0, a2
@@ -487,31 +490,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; DEFAULT-LABEL: sink_splat_fadd_scalable:
; DEFAULT: # %bb.0: # %entry
; DEFAULT-NEXT: csrr a1, vlenb
-; DEFAULT-NEXT: srli a1, a1, 2
-; DEFAULT-NEXT: li a2, 1024
-; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2
+; DEFAULT-NEXT: srli a2, a1, 2
+; DEFAULT-NEXT: li a3, 1024
+; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
; DEFAULT-NEXT: # %bb.1:
-; DEFAULT-NEXT: li a2, 0
+; DEFAULT-NEXT: li a3, 0
; DEFAULT-NEXT: j .LBB4_5
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
-; DEFAULT-NEXT: li a4, 0
-; DEFAULT-NEXT: addi a2, a1, -1
-; DEFAULT-NEXT: andi a3, a2, 1024
-; DEFAULT-NEXT: xori a2, a3, 1024
-; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
+; DEFAULT-NEXT: addi a3, a2, -1
+; DEFAULT-NEXT: andi a4, a3, 1024
+; DEFAULT-NEXT: xori a3, a4, 1024
+; DEFAULT-NEXT: mv a5, a0
+; DEFAULT-NEXT: mv a6, a3
+; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; DEFAULT-NEXT: .LBB4_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
-; DEFAULT-NEXT: slli a5, a4, 2
-; DEFAULT-NEXT: add a5, a0, a5
; DEFAULT-NEXT: vl1re32.v v8, (a5)
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
-; DEFAULT-NEXT: add a4, a4, a1
; DEFAULT-NEXT: vs1r.v v8, (a5)
-; DEFAULT-NEXT: bne a4, a2, .LBB4_3
+; DEFAULT-NEXT: sub a6, a6, a2
+; DEFAULT-NEXT: add a5, a5, a1
+; DEFAULT-NEXT: bnez a6, .LBB4_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
-; DEFAULT-NEXT: beqz a3, .LBB4_7
+; DEFAULT-NEXT: beqz a4, .LBB4_7
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT: slli a1, a2, 2
+; DEFAULT-NEXT: slli a1, a3, 2
; DEFAULT-NEXT: add a1, a0, a1
; DEFAULT-NEXT: lui a2, 1
; DEFAULT-NEXT: add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
index c95301809375c..3cb3c94d4e1f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
@@ -7,106 +7,143 @@
define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -112
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: .cfi_offset s1, -24
; CHECK-NEXT: .cfi_offset s2, -32
; CHECK-NEXT: .cfi_offset s3, -40
-; CHECK-NEXT: li a7, 0
-; CHECK-NEXT: ld s2, 48(sp)
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: .cfi_offset s10, -96
+; CHECK-NEXT: .cfi_offset s11, -104
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: li a7, 8
+; CHECK-NEXT: li t0, 12
+; CHECK-NEXT: li s0, 4
+; CHECK-NEXT: li t1, 20
+; CHECK-NEXT: ld a1, 112(sp)
+; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: andi s3, a0, 1
-; CHECK-NEXT: andi t1, a2, 1
-; CHECK-NEXT: andi a6, a4, 1
+; CHECK-NEXT: andi t3, a4, 1
+; CHECK-NEXT: li t2, 4
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t0, 0
+; CHECK-NEXT: mv t4, t1
+; CHECK-NEXT: mv t5, t2
+; CHECK-NEXT: mv t6, t0
+; CHECK-NEXT: mv s3, a7
+; CHECK-NEXT: mv a6, s2
; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Loop Header: Depth=2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t3, 0
-; CHECK-NEXT: add t2, t0, a7
+; CHECK-NEXT: mv s5, t4
+; CHECK-NEXT: mv s6, t5
+; CHECK-NEXT: mv s7, t6
+; CHECK-NEXT: mv s8, s3
+; CHECK-NEXT: mv s4, a6
; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # => This Loop Header: Depth=3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t5, 0
-; CHECK-NEXT: add t4, t2, t3
+; CHECK-NEXT: mv s11, s5
+; CHECK-NEXT: mv a3, s6
+; CHECK-NEXT: mv ra, s7
+; CHECK-NEXT: mv a4, s8
+; CHECK-NEXT: mv s9, s4
; CHECK-NEXT: .LBB0_4: # %vector.ph.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # => This Loop Header: Depth=4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li s1, 0
-; CHECK-NEXT: add a2, t4, t5
-; CHECK-NEXT: addi a0, a2, 2
-; CHECK-NEXT: addi a3, a2, 3
-; CHECK-NEXT: addi a1, a2, 1
-; CHECK-NEXT: addi a4, a2, 5
-; CHECK-NEXT: li a5, 1
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: .LBB0_5: # %vector.body.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # Parent Loop BB0_4 Depth=4
; CHECK-NEXT: # => This Inner Loop Header: Depth=5
-; CHECK-NEXT: mv t6, s1
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: addi a5, a5, 1
-; CHECK-NEXT: slli s1, a0, 2
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: slli s0, a1, 2
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a4, a4, 1
-; CHECK-NEXT: vse32.v v8, (s1), v0.t
-; CHECK-NEXT: vse32.v v8, (s0), v0.t
-; CHECK-NEXT: addi s1, t6, 1
-; CHECK-NEXT: bnez t6, .LBB0_5
+; CHECK-NEXT: addi s1, a5, 4
+; CHECK-NEXT: add a1, a4, a5
+; CHECK-NEXT: vse32.v v8, (a1), v0.t
+; CHECK-NEXT: add a5, a5, a3
+; CHECK-NEXT: vse32.v v8, (a5), v0.t
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: bne s1, s0, .LBB0_5
; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i
; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4
-; CHECK-NEXT: addi t5, t5, 1
-; CHECK-NEXT: beqz s3, .LBB0_4
+; CHECK-NEXT: addi s9, s9, 4
+; CHECK-NEXT: addi a4, a4, 4
+; CHECK-NEXT: addi ra, ra, 4
+; CHECK-NEXT: addi a3, a3, 4
+; CHECK-NEXT: andi s10, a0, 1
+; CHECK-NEXT: addi s11, s11, 4
+; CHECK-NEXT: beqz s10, .LBB0_4
; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3
-; CHECK-NEXT: addi t3, t3, 1
-; CHECK-NEXT: beqz t1, .LBB0_3
+; CHECK-NEXT: addi s4, s4, 4
+; CHECK-NEXT: addi s8, s8, 4
+; CHECK-NEXT: addi s7, s7, 4
+; CHECK-NEXT: addi s6, s6, 4
+; CHECK-NEXT: andi a1, a2, 1
+; CHECK-NEXT: addi s5, s5, 4
+; CHECK-NEXT: beqz a1, .LBB0_3
; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2
-; CHECK-NEXT: addi t0, t0, 1
-; CHECK-NEXT: beqz a6, .LBB0_2
+; CHECK-NEXT: addi a6, a6, 4
+; CHECK-NEXT: addi s3, s3, 4
+; CHECK-NEXT: addi t6, t6, 4
+; CHECK-NEXT: addi t5, t5, 4
+; CHECK-NEXT: addi t4, t4, 4
+; CHECK-NEXT: beqz t3, .LBB0_2
; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: addi a7, a7, 1
-; CHECK-NEXT: beqz t1, .LBB0_1
+; CHECK-NEXT: addi s2, s2, 4
+; CHECK-NEXT: addi a7, a7, 4
+; CHECK-NEXT: addi t0, t0, 4
+; CHECK-NEXT: addi t2, t2, 4
+; CHECK-NEXT: addi t1, t1, 4
+; CHECK-NEXT: beqz a1, .LBB0_1
; CHECK-NEXT: # %bb.10: # %l.exit
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: jalr a0
-; CHECK-NEXT: beqz s3, .LBB0_12
+; CHECK-NEXT: beqz s10, .LBB0_12
; CHECK-NEXT: .LBB0_11: # %for.body7.us.14
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: j .LBB0_11
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmv.s.x v8, s2
+; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v16, v8, 1
@@ -116,12 +153,20 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: sb a0, 0(zero)
; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 7742cfc7da640..d1c98f828e76d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -243,28 +243,29 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a2, a2, 1
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: srli a2, a5, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
-; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: mv a6, a0
+; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/98328
More information about the llvm-commits
mailing list