[llvm] 5ce067d - Revert "[LSR][TTI][RISCV] Disable terminator folding for RISC-V."
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 11 13:26:33 PST 2024
Author: Philip Reames
Date: 2024-01-11T13:20:02-08:00
New Revision: 5ce067d592b78fd3142364e06bae4da2a3a1e944
URL: https://github.com/llvm/llvm-project/commit/5ce067d592b78fd3142364e06bae4da2a3a1e944
DIFF: https://github.com/llvm/llvm-project/commit/5ce067d592b78fd3142364e06bae4da2a3a1e944.diff
LOG: Revert "[LSR][TTI][RISCV] Disable terminator folding for RISC-V."
This reverts commit fdb87640ee2be63af9b0e0cd943cb13d79686a03, and thus
re-enables terminator folding for RISCV. The reported miscompile has
been fixed in f5dd70c58277d925710e5a7c25c86d7565cc3c6c.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
llvm/test/CodeGen/RISCV/branch-on-zero.ll
llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll
llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7e5dbddb5b519c..0747a778fe9a2a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -362,8 +362,7 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
const TargetTransformInfo::LSRCost &C2);
bool shouldFoldTerminatingConditionAfterLSR() const {
- // FIXME: Enabling this causes miscompiles.
- return false;
+ return true;
}
};
diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
index e8cd1e35936a33..02aeebdeb37757 100644
--- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
@@ -120,36 +120,45 @@ define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; RV32-LABEL: test_lshr2:
; RV32: # %bb.0: # %entry
; RV32-NEXT: srli a2, a2, 2
-; RV32-NEXT: beqz a2, .LBB3_2
-; RV32-NEXT: .LBB3_1: # %while.body
+; RV32-NEXT: beqz a2, .LBB3_3
+; RV32-NEXT: # %bb.1: # %while.body.preheader
+; RV32-NEXT: slli a2, a2, 2
+; RV32-NEXT: add a2, a1, a2
+; RV32-NEXT: .LBB3_2: # %while.body
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lw a3, 0(a1)
-; RV32-NEXT: addi a1, a1, 4
+; RV32-NEXT: addi a4, a1, 4
; RV32-NEXT: slli a3, a3, 1
-; RV32-NEXT: addi a4, a0, 4
-; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: addi a1, a0, 4
; RV32-NEXT: sw a3, 0(a0)
-; RV32-NEXT: mv a0, a4
-; RV32-NEXT: bnez a2, .LBB3_1
-; RV32-NEXT: .LBB3_2: # %while.end
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: bne a4, a2, .LBB3_2
+; RV32-NEXT: .LBB3_3: # %while.end
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_lshr2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: srliw a2, a2, 2
-; RV64-NEXT: beqz a2, .LBB3_2
-; RV64-NEXT: .LBB3_1: # %while.body
+; RV64-NEXT: beqz a2, .LBB3_3
+; RV64-NEXT: # %bb.1: # %while.body.preheader
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 32
+; RV64-NEXT: srli a2, a2, 30
+; RV64-NEXT: add a2, a2, a1
+; RV64-NEXT: addi a2, a2, 4
+; RV64-NEXT: .LBB3_2: # %while.body
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: lw a3, 0(a1)
-; RV64-NEXT: addi a1, a1, 4
+; RV64-NEXT: addi a4, a1, 4
; RV64-NEXT: slli a3, a3, 1
-; RV64-NEXT: addi a4, a0, 4
-; RV64-NEXT: addiw a2, a2, -1
+; RV64-NEXT: addi a1, a0, 4
; RV64-NEXT: sw a3, 0(a0)
-; RV64-NEXT: mv a0, a4
-; RV64-NEXT: bnez a2, .LBB3_1
-; RV64-NEXT: .LBB3_2: # %while.end
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: mv a1, a4
+; RV64-NEXT: bne a4, a2, .LBB3_2
+; RV64-NEXT: .LBB3_3: # %while.end
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index c7454469fd69e2..31ca8eab33508c 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -8,16 +8,18 @@
define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: blez a1, .LBB0_2
-; CHECK-NEXT: .LBB0_1: # %for.body
+; CHECK-NEXT: blez a1, .LBB0_3
+; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a2, 0(a0)
; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB0_1
-; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
+; CHECK-NEXT: bne a0, a1, .LBB0_2
+; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
%cmp3 = icmp sgt i32 %n, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index b8b41b9e4c9166..4852850f234ba6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -206,33 +206,19 @@ define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {
; Test that we pull the vlse of the constant pool out of the loop.
define dso_local void @splat_load_licm(float* %0) {
-; RV32-LABEL: splat_load_licm:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1024
-; RV32-NEXT: lui a2, 263168
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a2
-; RV32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: addi a1, a1, -4
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: bnez a1, .LBB12_1
-; RV32-NEXT: # %bb.2:
-; RV32-NEXT: ret
-;
-; RV64-LABEL: splat_load_licm:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1024
-; RV64-NEXT: lui a2, 263168
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: addiw a1, a1, -4
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: bnez a1, .LBB12_1
-; RV64-NEXT: # %bb.2:
-; RV64-NEXT: ret
+; CHECK-LABEL: splat_load_licm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 263168
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a2
+; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: bne a0, a1, .LBB12_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: ret
br label %2
2: ; preds = %2, %1
@@ -1408,3 +1394,6 @@ define <2 x double> @vid_step2_v2f64() {
; CHECK-NEXT: ret
ret <2 x double> <double 0.0, double 2.0>
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 846295b3ead27d..eeb188627577dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -13,7 +13,7 @@
define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; CHECK-LABEL: gather:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: addi a2, a0, 1024
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -23,10 +23,9 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -32
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 160
-; CHECK-NEXT: bnez a2, .LBB0_1
+; CHECK-NEXT: bne a0, a2, .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -54,7 +53,7 @@ for.cond.cleanup: ; preds = %vector.body
define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) {
; V-LABEL: gather_masked:
; V: # %bb.0: # %entry
-; V-NEXT: li a2, 1024
+; V-NEXT: addi a2, a0, 1024
; V-NEXT: lui a3, 983765
; V-NEXT: addi a3, a3, 873
; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
@@ -69,16 +68,15 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; V-NEXT: vle8.v v10, (a0)
; V-NEXT: vadd.vv v9, v10, v9
; V-NEXT: vse8.v v9, (a0)
-; V-NEXT: addi a2, a2, -32
; V-NEXT: addi a0, a0, 32
; V-NEXT: addi a1, a1, 160
-; V-NEXT: bnez a2, .LBB1_1
+; V-NEXT: bne a0, a2, .LBB1_1
; V-NEXT: # %bb.2: # %for.cond.cleanup
; V-NEXT: ret
;
; ZVE32F-LABEL: gather_masked:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: li a2, 1024
+; ZVE32F-NEXT: addi a2, a0, 1024
; ZVE32F-NEXT: lui a3, 983765
; ZVE32F-NEXT: addi a3, a3, 873
; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -93,10 +91,9 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; ZVE32F-NEXT: vle8.v v10, (a0)
; ZVE32F-NEXT: vadd.vv v9, v10, v9
; ZVE32F-NEXT: vse8.v v9, (a0)
-; ZVE32F-NEXT: addi a2, a2, -32
; ZVE32F-NEXT: addi a0, a0, 32
; ZVE32F-NEXT: addi a1, a1, 160
-; ZVE32F-NEXT: bnez a2, .LBB1_1
+; ZVE32F-NEXT: bne a0, a2, .LBB1_1
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
; ZVE32F-NEXT: ret
entry:
@@ -125,7 +122,7 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt
; CHECK-LABEL: gather_negative_stride:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi a1, a1, 155
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: addi a2, a0, 1024
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: li a3, -5
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -135,10 +132,9 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -32
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 160
-; CHECK-NEXT: bnez a2, .LBB2_1
+; CHECK-NEXT: bne a0, a2, .LBB2_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -166,7 +162,7 @@ for.cond.cleanup: ; preds = %vector.body
define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; CHECK-LABEL: gather_zero_stride:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: addi a2, a0, 1024
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: .LBB3_1: # %vector.body
@@ -175,10 +171,9 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a3
; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -32
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 160
-; CHECK-NEXT: bnez a2, .LBB3_1
+; CHECK-NEXT: bne a0, a2, .LBB3_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -206,7 +201,7 @@ for.cond.cleanup: ; preds = %vector.body
define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; V-LABEL: gather_zero_stride_unfold:
; V: # %bb.0: # %entry
-; V-NEXT: li a2, 1024
+; V-NEXT: addi a2, a0, 1024
; V-NEXT: li a3, 32
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; V-NEXT: .LBB4_1: # %vector.body
@@ -215,16 +210,15 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
; V-NEXT: vle8.v v9, (a0)
; V-NEXT: vdivu.vv v8, v8, v9
; V-NEXT: vse8.v v8, (a0)
-; V-NEXT: addi a2, a2, -32
; V-NEXT: addi a0, a0, 32
; V-NEXT: addi a1, a1, 160
-; V-NEXT: bnez a2, .LBB4_1
+; V-NEXT: bne a0, a2, .LBB4_1
; V-NEXT: # %bb.2: # %for.cond.cleanup
; V-NEXT: ret
;
; ZVE32F-LABEL: gather_zero_stride_unfold:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: li a2, 1024
+; ZVE32F-NEXT: addi a2, a0, 1024
; ZVE32F-NEXT: li a3, 32
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; ZVE32F-NEXT: .LBB4_1: # %vector.body
@@ -233,16 +227,15 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
; ZVE32F-NEXT: vle8.v v9, (a0)
; ZVE32F-NEXT: vdivu.vv v8, v8, v9
; ZVE32F-NEXT: vse8.v v8, (a0)
-; ZVE32F-NEXT: addi a2, a2, -32
; ZVE32F-NEXT: addi a0, a0, 32
; ZVE32F-NEXT: addi a1, a1, 160
-; ZVE32F-NEXT: bnez a2, .LBB4_1
+; ZVE32F-NEXT: bne a0, a2, .LBB4_1
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
; ZVE32F-NEXT: ret
;
; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold:
; NOT-OPTIMIZED: # %bb.0: # %entry
-; NOT-OPTIMIZED-NEXT: li a2, 1024
+; NOT-OPTIMIZED-NEXT: addi a2, a0, 1024
; NOT-OPTIMIZED-NEXT: li a3, 32
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body
@@ -252,10 +245,9 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a3
; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8
; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
-; NOT-OPTIMIZED-NEXT: addi a2, a2, -32
; NOT-OPTIMIZED-NEXT: addi a0, a0, 32
; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
-; NOT-OPTIMIZED-NEXT: bnez a2, .LBB4_1
+; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB4_1
; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
; NOT-OPTIMIZED-NEXT: ret
entry:
@@ -287,7 +279,7 @@ for.cond.cleanup: ; preds = %vector.body
define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; CHECK-LABEL: scatter:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: addi a2, a1, 1024
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -297,10 +289,9 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B
; CHECK-NEXT: vlse8.v v9, (a0), a3
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vsse8.v v8, (a0), a3
-; CHECK-NEXT: addi a2, a2, -32
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: addi a0, a0, 160
-; CHECK-NEXT: bnez a2, .LBB5_1
+; CHECK-NEXT: bne a1, a2, .LBB5_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -328,7 +319,7 @@ for.cond.cleanup: ; preds = %vector.body
define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) {
; V-LABEL: scatter_masked:
; V: # %bb.0: # %entry
-; V-NEXT: li a2, 1024
+; V-NEXT: addi a2, a1, 1024
; V-NEXT: li a3, 32
; V-NEXT: lui a4, 983765
; V-NEXT: addi a4, a4, 873
@@ -343,16 +334,15 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; V-NEXT: vlse8.v v10, (a0), a4, v0.t
; V-NEXT: vadd.vv v9, v10, v9
; V-NEXT: vsse8.v v9, (a0), a4, v0.t
-; V-NEXT: addi a2, a2, -32
; V-NEXT: addi a1, a1, 32
; V-NEXT: addi a0, a0, 160
-; V-NEXT: bnez a2, .LBB6_1
+; V-NEXT: bne a1, a2, .LBB6_1
; V-NEXT: # %bb.2: # %for.cond.cleanup
; V-NEXT: ret
;
; ZVE32F-LABEL: scatter_masked:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: li a2, 1024
+; ZVE32F-NEXT: addi a2, a1, 1024
; ZVE32F-NEXT: li a3, 32
; ZVE32F-NEXT: lui a4, 983765
; ZVE32F-NEXT: addi a4, a4, 873
@@ -367,10 +357,9 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; ZVE32F-NEXT: vlse8.v v10, (a0), a4, v0.t
; ZVE32F-NEXT: vadd.vv v9, v10, v9
; ZVE32F-NEXT: vsse8.v v9, (a0), a4, v0.t
-; ZVE32F-NEXT: addi a2, a2, -32
; ZVE32F-NEXT: addi a1, a1, 32
; ZVE32F-NEXT: addi a0, a0, 160
-; ZVE32F-NEXT: bnez a2, .LBB6_1
+; ZVE32F-NEXT: bne a1, a2, .LBB6_1
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
; ZVE32F-NEXT: ret
entry:
@@ -402,7 +391,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; CHECK-LABEL: gather_pow2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 16
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: .LBB7_1: # %vector.body
@@ -415,10 +405,9 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -8
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 128
-; CHECK-NEXT: bnez a2, .LBB7_1
+; CHECK-NEXT: bne a0, a2, .LBB7_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -450,7 +439,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
; CHECK-LABEL: scatter_pow2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a1, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB8_1: # %vector.body
@@ -461,10 +451,9 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
; CHECK-NEXT: vlse32.v v9, (a0), a4
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vsse32.v v8, (a0), a4
-; CHECK-NEXT: addi a2, a2, -8
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: bnez a2, .LBB8_1
+; CHECK-NEXT: bne a1, a2, .LBB8_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -501,7 +490,8 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-LABEL: struct_gather:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi a1, a1, 132
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 16
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: .LBB9_1: # %vector.body
@@ -516,10 +506,9 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: vadd.vv v9, v11, v9
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: vse32.v v9, (a4)
-; CHECK-NEXT: addi a2, a2, -16
; CHECK-NEXT: addi a0, a0, 64
; CHECK-NEXT: addi a1, a1, 256
-; CHECK-NEXT: bnez a2, .LBB9_1
+; CHECK-NEXT: bne a0, a2, .LBB9_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -650,7 +639,8 @@ declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32 immarg, <
define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) {
; V-LABEL: gather_of_pointers:
; V: # %bb.0: # %bb
-; V-NEXT: li a2, 1024
+; V-NEXT: lui a2, 2
+; V-NEXT: add a2, a0, a2
; V-NEXT: li a3, 40
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; V-NEXT: .LBB11_1: # %bb2
@@ -661,22 +651,22 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
; V-NEXT: addi a4, a0, 16
; V-NEXT: vse64.v v8, (a0)
; V-NEXT: vse64.v v9, (a4)
-; V-NEXT: addi a2, a2, -4
; V-NEXT: addi a0, a0, 32
; V-NEXT: addi a1, a1, 160
-; V-NEXT: bnez a2, .LBB11_1
+; V-NEXT: bne a0, a2, .LBB11_1
; V-NEXT: # %bb.2: # %bb18
; V-NEXT: ret
;
; ZVE32F-LABEL: gather_of_pointers:
; ZVE32F: # %bb.0: # %bb
; ZVE32F-NEXT: li a2, 0
-; ZVE32F-NEXT: li a3, 1
-; ZVE32F-NEXT: li a4, 1024
+; ZVE32F-NEXT: lui a3, 2
+; ZVE32F-NEXT: add a3, a0, a3
+; ZVE32F-NEXT: li a4, 1
; ZVE32F-NEXT: li a5, 40
; ZVE32F-NEXT: .LBB11_1: # %bb2
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
-; ZVE32F-NEXT: mul a6, a3, a5
+; ZVE32F-NEXT: mul a6, a4, a5
; ZVE32F-NEXT: add a6, a1, a6
; ZVE32F-NEXT: mul a7, a2, a5
; ZVE32F-NEXT: add a7, a1, a7
@@ -689,10 +679,9 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
; ZVE32F-NEXT: sd a6, 24(a0)
; ZVE32F-NEXT: sd a7, 16(a0)
; ZVE32F-NEXT: addi a2, a2, 4
-; ZVE32F-NEXT: addi a3, a3, 4
-; ZVE32F-NEXT: addi a4, a4, -4
; ZVE32F-NEXT: addi a0, a0, 32
-; ZVE32F-NEXT: bnez a4, .LBB11_1
+; ZVE32F-NEXT: addi a4, a4, 4
+; ZVE32F-NEXT: bne a0, a3, .LBB11_1
; ZVE32F-NEXT: # %bb.2: # %bb18
; ZVE32F-NEXT: ret
bb:
@@ -727,7 +716,8 @@ declare <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr>, i32 immarg, <2 x i1>,
define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) {
; V-LABEL: scatter_of_pointers:
; V: # %bb.0: # %bb
-; V-NEXT: li a2, 1024
+; V-NEXT: lui a2, 2
+; V-NEXT: add a2, a1, a2
; V-NEXT: li a3, 40
; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; V-NEXT: .LBB12_1: # %bb2
@@ -738,18 +728,18 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
; V-NEXT: addi a4, a0, 80
; V-NEXT: vsse64.v v8, (a0), a3
; V-NEXT: vsse64.v v9, (a4), a3
-; V-NEXT: addi a2, a2, -4
; V-NEXT: addi a1, a1, 32
; V-NEXT: addi a0, a0, 160
-; V-NEXT: bnez a2, .LBB12_1
+; V-NEXT: bne a1, a2, .LBB12_1
; V-NEXT: # %bb.2: # %bb18
; V-NEXT: ret
;
; ZVE32F-LABEL: scatter_of_pointers:
; ZVE32F: # %bb.0: # %bb
; ZVE32F-NEXT: li a2, 0
-; ZVE32F-NEXT: li a3, 1
-; ZVE32F-NEXT: li a4, 1024
+; ZVE32F-NEXT: lui a3, 2
+; ZVE32F-NEXT: add a3, a1, a3
+; ZVE32F-NEXT: li a4, 1
; ZVE32F-NEXT: li a5, 40
; ZVE32F-NEXT: .LBB12_1: # %bb2
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
@@ -757,7 +747,7 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
; ZVE32F-NEXT: ld a7, 0(a1)
; ZVE32F-NEXT: ld t0, 24(a1)
; ZVE32F-NEXT: ld t1, 16(a1)
-; ZVE32F-NEXT: mul t2, a3, a5
+; ZVE32F-NEXT: mul t2, a4, a5
; ZVE32F-NEXT: add t2, a0, t2
; ZVE32F-NEXT: mul t3, a2, a5
; ZVE32F-NEXT: add t3, a0, t3
@@ -766,10 +756,9 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
; ZVE32F-NEXT: sd t1, 80(t3)
; ZVE32F-NEXT: sd t0, 80(t2)
; ZVE32F-NEXT: addi a2, a2, 4
-; ZVE32F-NEXT: addi a3, a3, 4
-; ZVE32F-NEXT: addi a4, a4, -4
; ZVE32F-NEXT: addi a1, a1, 32
-; ZVE32F-NEXT: bnez a4, .LBB12_1
+; ZVE32F-NEXT: addi a4, a4, 4
+; ZVE32F-NEXT: bne a1, a3, .LBB12_1
; ZVE32F-NEXT: # %bb.2: # %bb18
; ZVE32F-NEXT: ret
bb:
@@ -806,53 +795,56 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: beq a2, a3, .LBB13_7
; CHECK-NEXT: # %bb.1: # %bb3
-; CHECK-NEXT: li a4, 1023
-; CHECK-NEXT: subw a4, a4, a2
-; CHECK-NEXT: li a5, 31
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: bltu a4, a5, .LBB13_5
+; CHECK-NEXT: li a3, 1023
+; CHECK-NEXT: subw a5, a3, a2
+; CHECK-NEXT: li a6, 31
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bltu a5, a6, .LBB13_5
; CHECK-NEXT: # %bb.2: # %bb9
-; CHECK-NEXT: slli a4, a4, 32
-; CHECK-NEXT: srli a4, a4, 32
-; CHECK-NEXT: addi a4, a4, 1
-; CHECK-NEXT: andi a5, a4, -32
-; CHECK-NEXT: add a3, a5, a2
-; CHECK-NEXT: slli a7, a2, 2
-; CHECK-NEXT: add a6, a0, a2
+; CHECK-NEXT: slli a5, a5, 32
+; CHECK-NEXT: srli a5, a5, 32
+; CHECK-NEXT: addi a5, a5, 1
+; CHECK-NEXT: andi a6, a5, -32
+; CHECK-NEXT: add a4, a6, a2
+; CHECK-NEXT: slli t0, a2, 2
+; CHECK-NEXT: add a7, a0, a2
; CHECK-NEXT: add a2, a1, a2
-; CHECK-NEXT: add a2, a2, a7
-; CHECK-NEXT: li t0, 32
-; CHECK-NEXT: li a7, 5
-; CHECK-NEXT: vsetvli zero, t0, e8, m1, ta, ma
-; CHECK-NEXT: mv t0, a5
+; CHECK-NEXT: add a2, a2, t0
+; CHECK-NEXT: add t0, a4, a0
+; CHECK-NEXT: li t2, 32
+; CHECK-NEXT: li t1, 5
+; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma
; CHECK-NEXT: .LBB13_3: # %bb15
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vlse8.v v8, (a2), a7
-; CHECK-NEXT: vle8.v v9, (a6)
+; CHECK-NEXT: vlse8.v v8, (a2), t1
+; CHECK-NEXT: vle8.v v9, (a7)
; CHECK-NEXT: vadd.vv v8, v9, v8
-; CHECK-NEXT: vse8.v v8, (a6)
-; CHECK-NEXT: addi t0, t0, -32
-; CHECK-NEXT: addi a6, a6, 32
+; CHECK-NEXT: vse8.v v8, (a7)
+; CHECK-NEXT: addi a7, a7, 32
; CHECK-NEXT: addi a2, a2, 160
-; CHECK-NEXT: bnez t0, .LBB13_3
+; CHECK-NEXT: bne a7, t0, .LBB13_3
; CHECK-NEXT: # %bb.4: # %bb30
-; CHECK-NEXT: beq a4, a5, .LBB13_7
+; CHECK-NEXT: beq a5, a6, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %bb32
-; CHECK-NEXT: addi a2, a3, -1024
-; CHECK-NEXT: add a0, a0, a3
-; CHECK-NEXT: slli a4, a3, 2
-; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: add a2, a0, a4
+; CHECK-NEXT: slli a5, a4, 2
; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: add a1, a1, a5
+; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: slli a3, a3, 32
+; CHECK-NEXT: srli a3, a3, 32
+; CHECK-NEXT: add a0, a4, a0
+; CHECK-NEXT: add a0, a0, a3
+; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: .LBB13_6: # %bb35
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lbu a3, 0(a1)
-; CHECK-NEXT: lbu a4, 0(a0)
+; CHECK-NEXT: lbu a4, 0(a2)
; CHECK-NEXT: add a3, a4, a3
-; CHECK-NEXT: sb a3, 0(a0)
-; CHECK-NEXT: addiw a2, a2, 1
-; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: sb a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: addi a1, a1, 5
-; CHECK-NEXT: bnez a2, .LBB13_6
+; CHECK-NEXT: bne a2, a0, .LBB13_6
; CHECK-NEXT: .LBB13_7: # %bb34
; CHECK-NEXT: ret
bb:
@@ -926,6 +918,10 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: beqz a2, .LBB14_3
; CHECK-NEXT: # %bb.1: # %bb2
+; CHECK-NEXT: addi a2, a2, -16
+; CHECK-NEXT: andi a2, a2, -16
+; CHECK-NEXT: add a2, a2, a0
+; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
; CHECK-NEXT: .LBB14_2: # %bb4
@@ -934,10 +930,9 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -16
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: addi a1, a1, 80
-; CHECK-NEXT: bnez a2, .LBB14_2
+; CHECK-NEXT: bne a0, a2, .LBB14_2
; CHECK-NEXT: .LBB14_3: # %bb16
; CHECK-NEXT: ret
bb:
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll
index 77cf6f6a25ee69..350c888a2c7d69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll
@@ -12,7 +12,7 @@ define void @sink_splat_vp_and_i1(ptr nocapture %a, i1 zeroext %x, <8 x i1> %m,
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: addi a1, a0, 1024
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vlm.v v9, (a0)
@@ -20,9 +20,8 @@ define void @sink_splat_vp_and_i1(ptr nocapture %a, i1 zeroext %x, <8 x i1> %m,
; CHECK-NEXT: vmand.mm v9, v9, v8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vsm.v v9, (a0)
-; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: bnez a1, .LBB0_1
+; CHECK-NEXT: bne a0, a1, .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 9b083fc286e7c0..ede331cc376f89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5,16 +5,16 @@
define void @sink_splat_mul(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB0_1
+; CHECK-NEXT: bne a0, a2, .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -39,16 +39,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_add(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB1_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB1_1
+; CHECK-NEXT: bne a0, a2, .LBB1_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -73,16 +73,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_sub(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB2_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB2_1
+; CHECK-NEXT: bne a0, a2, .LBB2_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -107,16 +107,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_rsub(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB3_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB3_1
+; CHECK-NEXT: bne a0, a2, .LBB3_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -141,16 +141,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_and(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB4_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB4_1
+; CHECK-NEXT: bne a0, a2, .LBB4_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -175,16 +175,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_or(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB5_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB5_1
+; CHECK-NEXT: bne a0, a2, .LBB5_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -209,16 +209,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_xor(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB6_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB6_1
+; CHECK-NEXT: bne a0, a2, .LBB6_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -244,42 +244,42 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB7_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB7_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB7_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB7_7
; CHECK-NEXT: .LBB7_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB7_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: mul a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB7_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: mul a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -335,42 +335,42 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB8_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB8_5
; CHECK-NEXT: .LBB8_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB8_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB8_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB8_7
; CHECK-NEXT: .LBB8_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB8_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: add a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB8_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB8_6
; CHECK-NEXT: .LBB8_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -426,42 +426,42 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB9_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB9_5
; CHECK-NEXT: .LBB9_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB9_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB9_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB9_7
; CHECK-NEXT: .LBB9_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB9_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: add a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB9_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB9_6
; CHECK-NEXT: .LBB9_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -517,42 +517,42 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB10_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB10_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB10_5
; CHECK-NEXT: .LBB10_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB10_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB10_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB10_7
; CHECK-NEXT: .LBB10_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB10_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: subw a2, a1, a2
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB10_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: subw a3, a1, a3
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB10_6
; CHECK-NEXT: .LBB10_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -608,42 +608,42 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB11_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB11_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB11_5
; CHECK-NEXT: .LBB11_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB11_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB11_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB11_7
; CHECK-NEXT: .LBB11_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB11_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: and a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB11_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: and a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB11_6
; CHECK-NEXT: .LBB11_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -699,42 +699,42 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB12_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB12_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB12_5
; CHECK-NEXT: .LBB12_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB12_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB12_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB12_7
; CHECK-NEXT: .LBB12_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB12_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: or a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB12_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: or a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB12_6
; CHECK-NEXT: .LBB12_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -790,42 +790,42 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB13_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB13_5
; CHECK-NEXT: .LBB13_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB13_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB13_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB13_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: xor a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB13_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: xor a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB13_6
; CHECK-NEXT: .LBB13_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -880,16 +880,16 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_shl(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_shl:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB14_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsll.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB14_1
+; CHECK-NEXT: bne a0, a2, .LBB14_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -914,16 +914,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_lshr(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_lshr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB15_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsrl.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB15_1
+; CHECK-NEXT: bne a0, a2, .LBB15_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -948,16 +948,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_ashr(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_ashr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB16_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsra.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB16_1
+; CHECK-NEXT: bne a0, a2, .LBB16_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -983,42 +983,42 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_shl_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB17_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB17_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB17_5
; CHECK-NEXT: .LBB17_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB17_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsll.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB17_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB17_7
; CHECK-NEXT: .LBB17_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB17_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: sllw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB17_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: sllw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB17_6
; CHECK-NEXT: .LBB17_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1074,42 +1074,42 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_lshr_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB18_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB18_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB18_5
; CHECK-NEXT: .LBB18_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB18_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsrl.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB18_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB18_7
; CHECK-NEXT: .LBB18_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB18_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: srlw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB18_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: srlw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB18_6
; CHECK-NEXT: .LBB18_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1190,17 +1190,17 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) {
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a3, .LBB19_7
; CHECK-NEXT: .LBB19_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a1, -1024
; CHECK-NEXT: slli a1, a1, 2
-; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB19_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a1, 0(a0)
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: sw a1, 0(a0)
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a2, .LBB19_6
+; CHECK-NEXT: lw a2, 0(a1)
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB19_6
; CHECK-NEXT: .LBB19_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1255,16 +1255,16 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_fmul(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB20_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfmul.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB20_1
+; CHECK-NEXT: bne a0, a1, .LBB20_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1289,16 +1289,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_fdiv(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB21_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfdiv.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB21_1
+; CHECK-NEXT: bne a0, a1, .LBB21_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1323,16 +1323,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_frdiv(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB22_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB22_1
+; CHECK-NEXT: bne a0, a1, .LBB22_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1357,16 +1357,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_fadd(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB23_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfadd.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB23_1
+; CHECK-NEXT: bne a0, a1, .LBB23_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1391,16 +1391,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_fsub(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB24_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfsub.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB24_1
+; CHECK-NEXT: bne a0, a1, .LBB24_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1425,16 +1425,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_frsub(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB25_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfrsub.vf v8, v8, fa0
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB25_1
+; CHECK-NEXT: bne a0, a1, .LBB25_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1460,41 +1460,41 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB26_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB26_5
; CHECK-NEXT: .LBB26_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB26_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfmul.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB26_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB26_7
; CHECK-NEXT: .LBB26_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB26_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fmul.s fa5, fa5, fa0
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB26_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB26_6
; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1550,41 +1550,41 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB27_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB27_5
; CHECK-NEXT: .LBB27_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB27_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfdiv.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB27_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB27_7
; CHECK-NEXT: .LBB27_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB27_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fdiv.s fa5, fa5, fa0
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB27_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB27_6
; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1640,41 +1640,41 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB28_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB28_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB28_5
; CHECK-NEXT: .LBB28_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB28_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB28_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB28_7
; CHECK-NEXT: .LBB28_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB28_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fdiv.s fa5, fa0, fa5
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB28_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB28_6
; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1730,41 +1730,41 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB29_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB29_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB29_5
; CHECK-NEXT: .LBB29_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB29_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfadd.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB29_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB29_7
; CHECK-NEXT: .LBB29_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB29_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fadd.s fa5, fa5, fa0
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB29_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB29_6
; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1820,41 +1820,41 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB30_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB30_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB30_5
; CHECK-NEXT: .LBB30_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB30_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfsub.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB30_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB30_7
; CHECK-NEXT: .LBB30_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB30_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fsub.s fa5, fa5, fa0
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB30_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB30_6
; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1910,41 +1910,41 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB31_2
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB31_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB31_5
; CHECK-NEXT: .LBB31_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a2
+; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB31_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrsub.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a3
+; CHECK-NEXT: sub a6, a6, a2
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB31_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB31_7
; CHECK-NEXT: .LBB31_5: # %for.body.preheader
-; CHECK-NEXT: addi a1, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: .LBB31_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: flw fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 0(a1)
; CHECK-NEXT: fsub.s fa5, fa0, fa5
-; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a1, .LBB31_6
+; CHECK-NEXT: fsw fa5, 0(a1)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: bne a1, a0, .LBB31_6
; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -1999,7 +1999,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
; CHECK-LABEL: sink_splat_fma:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a1, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB32_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2007,10 +2008,9 @@ define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b,
; CHECK-NEXT: vle32.v v9, (a1)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB32_1
+; CHECK-NEXT: bne a1, a2, .LBB32_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2037,7 +2037,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
; CHECK-LABEL: sink_splat_fma_commute:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a1, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB33_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2045,10 +2046,9 @@ define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture read
; CHECK-NEXT: vle32.v v9, (a1)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB33_1
+; CHECK-NEXT: bne a1, a2, .LBB33_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2103,20 +2103,20 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a5, .LBB34_7
; CHECK-NEXT: .LBB34_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a4, -1024
; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: add a2, a1, a4
; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB34_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: flw fa5, 0(a0)
-; CHECK-NEXT: flw fa4, 0(a1)
+; CHECK-NEXT: flw fa4, 0(a2)
; CHECK-NEXT: fmadd.s fa5, fa5, fa0, fa4
; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a2, .LBB34_6
+; CHECK-NEXT: bne a2, a1, .LBB34_6
; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2203,20 +2203,20 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a5, .LBB35_7
; CHECK-NEXT: .LBB35_5: # %for.body.preheader
-; CHECK-NEXT: addi a2, a4, -1024
; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: add a2, a1, a4
; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB35_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: flw fa5, 0(a0)
-; CHECK-NEXT: flw fa4, 0(a1)
+; CHECK-NEXT: flw fa4, 0(a2)
; CHECK-NEXT: fmadd.s fa5, fa0, fa5, fa4
; CHECK-NEXT: fsw fa5, 0(a0)
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a2, .LBB35_6
+; CHECK-NEXT: bne a2, a1, .LBB35_6
; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2280,7 +2280,8 @@ declare float @llvm.fma.f32(float, float, float)
define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) {
; CHECK-LABEL: sink_splat_icmp:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB36_1: # %vector.body
@@ -2288,9 +2289,8 @@ define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) {
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vmseq.vx v0, v9, a1
; CHECK-NEXT: vse32.v v8, (a0), v0.t
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB36_1
+; CHECK-NEXT: bne a0, a2, .LBB36_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2316,7 +2316,8 @@ declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
define void @sink_splat_fcmp(ptr nocapture %x, float %y) {
; CHECK-LABEL: sink_splat_fcmp:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB37_1: # %vector.body
@@ -2324,9 +2325,8 @@ define void @sink_splat_fcmp(ptr nocapture %x, float %y) {
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vmfeq.vf v0, v9, fa0
; CHECK-NEXT: vse32.v v8, (a0), v0.t
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB37_1
+; CHECK-NEXT: bne a0, a1, .LBB37_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2352,16 +2352,16 @@ declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
define void @sink_splat_udiv(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_udiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB38_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vdivu.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB38_1
+; CHECK-NEXT: bne a0, a2, .LBB38_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2386,16 +2386,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_sdiv(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB39_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vdiv.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB39_1
+; CHECK-NEXT: bne a0, a2, .LBB39_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2420,16 +2420,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_urem(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_urem:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB40_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vremu.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB40_1
+; CHECK-NEXT: bne a0, a2, .LBB40_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2454,16 +2454,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_srem(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_srem:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB41_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vrem.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB41_1
+; CHECK-NEXT: bne a0, a2, .LBB41_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2489,42 +2489,42 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_udiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB42_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB42_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB42_5
; CHECK-NEXT: .LBB42_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB42_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdivu.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB42_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB42_7
; CHECK-NEXT: .LBB42_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB42_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: divuw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB42_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: divuw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB42_6
; CHECK-NEXT: .LBB42_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2580,42 +2580,42 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB43_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB43_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB43_5
; CHECK-NEXT: .LBB43_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB43_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdiv.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB43_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB43_7
; CHECK-NEXT: .LBB43_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB43_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: divw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB43_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: divw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB43_6
; CHECK-NEXT: .LBB43_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2671,42 +2671,42 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_urem_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB44_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB44_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB44_5
; CHECK-NEXT: .LBB44_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB44_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vremu.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB44_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB44_7
; CHECK-NEXT: .LBB44_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB44_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: remuw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB44_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: remuw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB44_6
; CHECK-NEXT: .LBB44_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2762,42 +2762,42 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_srem_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a3, a5, 1
-; CHECK-NEXT: li a2, 1024
-; CHECK-NEXT: bgeu a2, a3, .LBB45_2
+; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB45_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB45_5
; CHECK-NEXT: .LBB45_2: # %vector.ph
-; CHECK-NEXT: addi a2, a3, -1
-; CHECK-NEXT: andi a4, a2, 1024
-; CHECK-NEXT: xori a2, a4, 1024
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a2
+; CHECK-NEXT: mv a7, a3
; CHECK-NEXT: .LBB45_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrem.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a3
+; CHECK-NEXT: sub a7, a7, a2
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB45_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB45_7
; CHECK-NEXT: .LBB45_5: # %for.body.preheader
-; CHECK-NEXT: addi a3, a2, -1024
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: .LBB45_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: remw a2, a2, a1
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: bnez a3, .LBB45_6
+; CHECK-NEXT: lw a3, 0(a2)
+; CHECK-NEXT: remw a3, a3, a1
+; CHECK-NEXT: sw a3, 0(a2)
+; CHECK-NEXT: addi a2, a2, 4
+; CHECK-NEXT: bne a2, a0, .LBB45_6
; CHECK-NEXT: .LBB45_7: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2854,7 +2854,8 @@ declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_mul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB46_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2863,9 +2864,8 @@ define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i3
; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB46_1
+; CHECK-NEXT: bne a0, a3, .LBB46_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2892,7 +2892,8 @@ declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB47_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2901,9 +2902,8 @@ define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i3
; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB47_1
+; CHECK-NEXT: bne a0, a3, .LBB47_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2928,7 +2928,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_add_commute:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB48_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2937,9 +2938,8 @@ define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1
; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB48_1
+; CHECK-NEXT: bne a0, a3, .LBB48_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -2966,7 +2966,8 @@ declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_sub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB49_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2975,9 +2976,8 @@ define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i3
; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB49_1
+; CHECK-NEXT: bne a0, a3, .LBB49_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3002,7 +3002,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_rsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB50_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3011,9 +3012,8 @@ define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB50_1
+; CHECK-NEXT: bne a0, a3, .LBB50_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3040,7 +3040,8 @@ declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_shl:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB51_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3049,9 +3050,8 @@ define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i3
; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB51_1
+; CHECK-NEXT: bne a0, a3, .LBB51_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3078,7 +3078,8 @@ declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_lshr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB52_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3087,9 +3088,8 @@ define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB52_1
+; CHECK-NEXT: bne a0, a3, .LBB52_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3116,7 +3116,8 @@ declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_ashr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB53_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3125,9 +3126,8 @@ define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB53_1
+; CHECK-NEXT: bne a0, a3, .LBB53_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3154,7 +3154,8 @@ declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fmul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB54_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3163,9 +3164,8 @@ define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zer
; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB54_1
+; CHECK-NEXT: bne a0, a2, .LBB54_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3192,7 +3192,8 @@ declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB55_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3201,9 +3202,8 @@ define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zer
; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB55_1
+; CHECK-NEXT: bne a0, a2, .LBB55_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3228,7 +3228,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_frdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB56_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3237,9 +3238,8 @@ define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 ze
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB56_1
+; CHECK-NEXT: bne a0, a2, .LBB56_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3266,7 +3266,8 @@ declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fadd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB57_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3275,9 +3276,8 @@ define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zer
; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB57_1
+; CHECK-NEXT: bne a0, a2, .LBB57_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3304,7 +3304,8 @@ declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB58_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3313,9 +3314,8 @@ define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zer
; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB58_1
+; CHECK-NEXT: bne a0, a2, .LBB58_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3342,7 +3342,8 @@ declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32
define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_frsub:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB59_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3351,9 +3352,8 @@ define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 ze
; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB59_1
+; CHECK-NEXT: bne a0, a2, .LBB59_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3380,7 +3380,8 @@ declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_udiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB60_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3389,9 +3390,8 @@ define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB60_1
+; CHECK-NEXT: bne a0, a3, .LBB60_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3418,7 +3418,8 @@ declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_sdiv:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB61_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3427,9 +3428,8 @@ define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB61_1
+; CHECK-NEXT: bne a0, a3, .LBB61_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3456,7 +3456,8 @@ declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_urem:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB62_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3465,9 +3466,8 @@ define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB62_1
+; CHECK-NEXT: bne a0, a3, .LBB62_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3494,7 +3494,8 @@ declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_srem:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB63_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3503,9 +3504,8 @@ define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i
; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB63_1
+; CHECK-NEXT: bne a0, a3, .LBB63_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3534,7 +3534,8 @@ define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: li a1, 1024
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: .LBB64_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v9, (a0)
@@ -3542,9 +3543,8 @@ define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i
; CHECK-NEXT: vrem.vv v9, v8, v9, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a1, .LBB64_1
+; CHECK-NEXT: bne a0, a1, .LBB64_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3571,7 +3571,8 @@ declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4
define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fma:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a1, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB65_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3581,10 +3582,9 @@ define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB65_1
+; CHECK-NEXT: bne a1, a3, .LBB65_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3611,7 +3611,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_fma_commute:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a1, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB66_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3621,10 +3622,9 @@ define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture r
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB66_1
+; CHECK-NEXT: bne a1, a3, .LBB66_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3652,16 +3652,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_mul_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB67_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB67_1
+; CHECK-NEXT: bne a0, a2, .LBB67_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3686,16 +3686,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_add_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_add_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB68_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB68_1
+; CHECK-NEXT: bne a0, a2, .LBB68_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3720,16 +3720,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_sub_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_sub_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB69_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB69_1
+; CHECK-NEXT: bne a0, a2, .LBB69_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3754,16 +3754,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_rsub_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_rsub_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB70_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB70_1
+; CHECK-NEXT: bne a0, a2, .LBB70_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3788,16 +3788,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_and_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_and_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB71_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB71_1
+; CHECK-NEXT: bne a0, a2, .LBB71_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3822,16 +3822,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_or_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_or_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB72_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB72_1
+; CHECK-NEXT: bne a0, a2, .LBB72_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3856,16 +3856,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_xor_lmul2(ptr nocapture %a, i64 signext %x) {
; CHECK-LABEL: sink_splat_xor_lmul2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: .LBB73_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB73_1
+; CHECK-NEXT: bne a0, a2, .LBB73_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3890,7 +3890,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB74_1: # %vector.body
@@ -3898,9 +3899,8 @@ define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB74_1
+; CHECK-NEXT: bne a0, a2, .LBB74_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3925,7 +3925,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB75_1: # %vector.body
@@ -3933,9 +3934,8 @@ define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB75_1
+; CHECK-NEXT: bne a0, a2, .LBB75_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3960,7 +3960,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB76_1: # %vector.body
@@ -3968,9 +3969,8 @@ define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB76_1
+; CHECK-NEXT: bne a0, a2, .LBB76_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -3995,7 +3995,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB77_1: # %vector.body
@@ -4003,9 +4004,8 @@ define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB77_1
+; CHECK-NEXT: bne a0, a2, .LBB77_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4030,7 +4030,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB78_1: # %vector.body
@@ -4038,9 +4039,8 @@ define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB78_1
+; CHECK-NEXT: bne a0, a2, .LBB78_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4065,7 +4065,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB79_1: # %vector.body
@@ -4073,9 +4074,8 @@ define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB79_1
+; CHECK-NEXT: bne a0, a2, .LBB79_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4100,7 +4100,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_lmul8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: .LBB80_1: # %vector.body
@@ -4108,9 +4109,8 @@ define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB80_1
+; CHECK-NEXT: bne a0, a2, .LBB80_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4135,16 +4135,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB81_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB81_1
+; CHECK-NEXT: bne a0, a2, .LBB81_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4169,16 +4169,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_add_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB82_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB82_1
+; CHECK-NEXT: bne a0, a2, .LBB82_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4203,16 +4203,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_sub_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB83_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB83_1
+; CHECK-NEXT: bne a0, a2, .LBB83_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4237,16 +4237,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_rsub_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB84_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB84_1
+; CHECK-NEXT: bne a0, a2, .LBB84_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4271,16 +4271,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_and_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB85_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB85_1
+; CHECK-NEXT: bne a0, a2, .LBB85_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4305,16 +4305,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_or_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB86_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB86_1
+; CHECK-NEXT: bne a0, a2, .LBB86_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4339,16 +4339,16 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_xor_lmulmf2(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_lmulmf2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB87_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 32
-; CHECK-NEXT: bnez a2, .LBB87_1
+; CHECK-NEXT: bne a0, a2, .LBB87_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4376,7 +4376,8 @@ define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i
; CHECK-LABEL: sink_splat_vp_icmp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: .LBB88_1: # %vector.body
@@ -4387,9 +4388,8 @@ define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i
; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0), v0.t
-; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a3, .LBB88_1
+; CHECK-NEXT: bne a0, a3, .LBB88_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
@@ -4417,7 +4417,8 @@ define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zer
; CHECK-LABEL: sink_splat_vp_fcmp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: .LBB89_1: # %vector.body
@@ -4428,9 +4429,8 @@ define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zer
; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0), v0.t
-; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bnez a2, .LBB89_1
+; CHECK-NEXT: bne a0, a2, .LBB89_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 07fcec13146cbb..ff39bd2580b7fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -951,16 +951,15 @@ if.end:
define void @pre_over_vle(ptr %A) {
; CHECK-LABEL: pre_over_vle:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li a1, 100
+; CHECK-NEXT: addi a1, a0, 800
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB22_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf4 v9, v8
; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 8
-; CHECK-NEXT: bnez a1, .LBB22_1
+; CHECK-NEXT: bne a0, a1, .LBB22_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
index 38a754d1faa884..9c11bd064ad47c 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
@@ -8,15 +8,14 @@ target triple = "riscv64"
define void @test1(ptr %a) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -39,15 +38,14 @@ exit: ; preds = %loop
define void @test2(ptr %a) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: ret void
@@ -72,19 +70,18 @@ exit: ; preds = %loop
define void @test3(ptr %a, ptr %b) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -110,19 +107,18 @@ exit: ; preds = %loop
define void @test4(ptr %a, ptr %b) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: call void @use(ptr [[B]])
More information about the llvm-commits
mailing list