[llvm] 08ea27b - [RISCV] Don't require loop simplify form in RISCVGatherScatterLowering.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 10 13:00:32 PDT 2022
Author: Craig Topper
Date: 2022-06-10T13:00:20-07:00
New Revision: 08ea27bf13e68afc805b31af27634103dd392c8c
URL: https://github.com/llvm/llvm-project/commit/08ea27bf13e68afc805b31af27634103dd392c8c
DIFF: https://github.com/llvm/llvm-project/commit/08ea27bf13e68afc805b31af27634103dd392c8c.diff
LOG: [RISCV] Don't require loop simplify form in RISCVGatherScatterLowering.
We need a preheader and a single latch, but we don't need a dedicated
exit.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D127513
Added:
Modified:
llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 7ee58484bb62..2410cc1f8859 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -338,9 +338,9 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
if (Ops[0]->getType()->isVectorTy())
return std::make_pair(nullptr, nullptr);
- // Make sure we're in a loop and it is in loop simplify form.
+ // Make sure we're in a loop and that has a pre-header and a single latch.
Loop *L = LI->getLoopFor(GEP->getParent());
- if (!L || !L->isLoopSimplifyForm())
+ if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
return std::make_pair(nullptr, nullptr);
Optional<unsigned> VecOperand;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
index 7dc28d7a5132..a8bb9196db59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
@@ -877,243 +877,25 @@ declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32 immarg, <16
declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32 immarg, <16 x i1>)
define void @gather_no_scalar_remainder(i8* noalias nocapture noundef %arg, i8* noalias nocapture noundef readonly %arg1, i64 noundef %arg2) {
-; V-LABEL: gather_no_scalar_remainder:
-; V: # %bb.0: # %bb
-; V-NEXT: slli a2, a2, 4
-; V-NEXT: beqz a2, .LBB13_3
-; V-NEXT: # %bb.1: # %bb2
-; V-NEXT: vsetivli zero, 16, e64, m4, ta, mu
-; V-NEXT: vid.v v8
-; V-NEXT: li a3, 5
-; V-NEXT: li a4, 16
-; V-NEXT: .LBB13_2: # %bb4
-; V-NEXT: # =>This Inner Loop Header: Depth=1
-; V-NEXT: vmul.vx v12, v8, a3
-; V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; V-NEXT: vluxei64.v v16, (a1), v12
-; V-NEXT: vle8.v v12, (a0)
-; V-NEXT: vadd.vv v12, v12, v16
-; V-NEXT: vse8.v v12, (a0)
-; V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; V-NEXT: vadd.vx v8, v8, a4
-; V-NEXT: addi a2, a2, -16
-; V-NEXT: addi a0, a0, 16
-; V-NEXT: bnez a2, .LBB13_2
-; V-NEXT: .LBB13_3: # %bb16
-; V-NEXT: ret
-;
-; ZVE32F-LABEL: gather_no_scalar_remainder:
-; ZVE32F: # %bb.0: # %bb
-; ZVE32F-NEXT: addi sp, sp, -240
-; ZVE32F-NEXT: .cfi_def_cfa_offset 240
-; ZVE32F-NEXT: sd ra, 232(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s0, 224(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s1, 216(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s2, 208(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s3, 200(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s4, 192(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s5, 184(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s6, 176(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s7, 168(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s8, 160(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s9, 152(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s10, 144(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd s11, 136(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: .cfi_offset ra, -8
-; ZVE32F-NEXT: .cfi_offset s0, -16
-; ZVE32F-NEXT: .cfi_offset s1, -24
-; ZVE32F-NEXT: .cfi_offset s2, -32
-; ZVE32F-NEXT: .cfi_offset s3, -40
-; ZVE32F-NEXT: .cfi_offset s4, -48
-; ZVE32F-NEXT: .cfi_offset s5, -56
-; ZVE32F-NEXT: .cfi_offset s6, -64
-; ZVE32F-NEXT: .cfi_offset s7, -72
-; ZVE32F-NEXT: .cfi_offset s8, -80
-; ZVE32F-NEXT: .cfi_offset s9, -88
-; ZVE32F-NEXT: .cfi_offset s10, -96
-; ZVE32F-NEXT: .cfi_offset s11, -104
-; ZVE32F-NEXT: slli a2, a2, 4
-; ZVE32F-NEXT: beqz a2, .LBB13_3
-; ZVE32F-NEXT: # %bb.1: # %bb2
-; ZVE32F-NEXT: li a3, 0
-; ZVE32F-NEXT: li a4, 15
-; ZVE32F-NEXT: li a5, 14
-; ZVE32F-NEXT: li a6, 13
-; ZVE32F-NEXT: li a7, 12
-; ZVE32F-NEXT: li t0, 11
-; ZVE32F-NEXT: li t1, 10
-; ZVE32F-NEXT: li t2, 9
-; ZVE32F-NEXT: li t3, 8
-; ZVE32F-NEXT: li t4, 7
-; ZVE32F-NEXT: li t5, 6
-; ZVE32F-NEXT: li t6, 5
-; ZVE32F-NEXT: li s0, 4
-; ZVE32F-NEXT: li s1, 3
-; ZVE32F-NEXT: li s2, 2
-; ZVE32F-NEXT: li s3, 1
-; ZVE32F-NEXT: vsetivli zero, 16, e8, mf2, ta, mu
-; ZVE32F-NEXT: .LBB13_2: # %bb4
-; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
-; ZVE32F-NEXT: sd s0, 56(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd t0, 64(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd a5, 72(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd a3, 88(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd a0, 96(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: sd a2, 104(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: slli s4, a3, 2
-; ZVE32F-NEXT: add a0, s4, a3
-; ZVE32F-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: slli s5, s3, 2
-; ZVE32F-NEXT: add a0, s5, s3
-; ZVE32F-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: slli s6, s2, 2
-; ZVE32F-NEXT: add a0, s6, s2
-; ZVE32F-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: slli s7, s1, 2
-; ZVE32F-NEXT: add a0, s7, s1
-; ZVE32F-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: slli s8, s0, 2
-; ZVE32F-NEXT: add a0, s8, s0
-; ZVE32F-NEXT: slli s9, t6, 2
-; ZVE32F-NEXT: add a3, s9, t6
-; ZVE32F-NEXT: slli s10, t5, 2
-; ZVE32F-NEXT: add s10, s10, t5
-; ZVE32F-NEXT: slli s11, t4, 2
-; ZVE32F-NEXT: add s11, s11, t4
-; ZVE32F-NEXT: slli ra, t3, 2
-; ZVE32F-NEXT: add ra, ra, t3
-; ZVE32F-NEXT: slli s4, t2, 2
-; ZVE32F-NEXT: add s4, s4, t2
-; ZVE32F-NEXT: slli s5, t1, 2
-; ZVE32F-NEXT: add s5, s5, t1
-; ZVE32F-NEXT: slli s6, t0, 2
-; ZVE32F-NEXT: add s6, s6, t0
-; ZVE32F-NEXT: slli s7, a7, 2
-; ZVE32F-NEXT: add s7, s7, a7
-; ZVE32F-NEXT: slli s8, a6, 2
-; ZVE32F-NEXT: add s8, s8, a6
-; ZVE32F-NEXT: slli a2, a5, 2
-; ZVE32F-NEXT: add a2, a2, a5
-; ZVE32F-NEXT: slli s9, a4, 2
-; ZVE32F-NEXT: add s9, s9, a4
-; ZVE32F-NEXT: add a4, a1, s9
-; ZVE32F-NEXT: sd a4, 16(sp) # 8-byte Folded Spill
-; ZVE32F-NEXT: add a2, a1, a2
-; ZVE32F-NEXT: add s8, a1, s8
-; ZVE32F-NEXT: add s7, a1, s7
-; ZVE32F-NEXT: add s6, a1, s6
-; ZVE32F-NEXT: add s5, a1, s5
-; ZVE32F-NEXT: add s4, a1, s4
-; ZVE32F-NEXT: add ra, a1, ra
-; ZVE32F-NEXT: add s11, a1, s11
-; ZVE32F-NEXT: add s10, a1, s10
-; ZVE32F-NEXT: add a3, a1, a3
-; ZVE32F-NEXT: add a0, a1, a0
-; ZVE32F-NEXT: ld a4, 24(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: add a4, a1, a4
-; ZVE32F-NEXT: ld a5, 32(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: add a5, a1, a5
-; ZVE32F-NEXT: mv t0, a6
-; ZVE32F-NEXT: ld a6, 40(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: add a6, a1, a6
-; ZVE32F-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: add s9, a1, s0
-; ZVE32F-NEXT: lb s9, 0(s9)
-; ZVE32F-NEXT: lb a6, 0(a6)
-; ZVE32F-NEXT: lb a5, 0(a5)
-; ZVE32F-NEXT: lb a4, 0(a4)
-; ZVE32F-NEXT: lb a0, 0(a0)
-; ZVE32F-NEXT: lb a3, 0(a3)
-; ZVE32F-NEXT: lb s10, 0(s10)
-; ZVE32F-NEXT: lb s11, 0(s11)
-; ZVE32F-NEXT: lb ra, 0(ra)
-; ZVE32F-NEXT: lb s4, 0(s4)
-; ZVE32F-NEXT: lb s5, 0(s5)
-; ZVE32F-NEXT: lb s6, 0(s6)
-; ZVE32F-NEXT: lb s7, 0(s7)
-; ZVE32F-NEXT: lb s8, 0(s8)
-; ZVE32F-NEXT: lb a2, 0(a2)
-; ZVE32F-NEXT: mv s0, t6
-; ZVE32F-NEXT: mv t6, t5
-; ZVE32F-NEXT: mv t5, t4
-; ZVE32F-NEXT: mv t4, t3
-; ZVE32F-NEXT: mv t3, t2
-; ZVE32F-NEXT: mv t2, t1
-; ZVE32F-NEXT: mv t1, a7
-; ZVE32F-NEXT: ld a7, 16(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: lb a7, 0(a7)
-; ZVE32F-NEXT: sb s9, 112(sp)
-; ZVE32F-NEXT: sb a6, 113(sp)
-; ZVE32F-NEXT: mv a6, t0
-; ZVE32F-NEXT: ld t0, 64(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb a5, 114(sp)
-; ZVE32F-NEXT: ld a5, 72(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb a4, 115(sp)
-; ZVE32F-NEXT: ld a4, 80(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb a0, 116(sp)
-; ZVE32F-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb a3, 117(sp)
-; ZVE32F-NEXT: ld a3, 88(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb s10, 118(sp)
-; ZVE32F-NEXT: sb s11, 119(sp)
-; ZVE32F-NEXT: sb ra, 120(sp)
-; ZVE32F-NEXT: sb s4, 121(sp)
-; ZVE32F-NEXT: sb s5, 122(sp)
-; ZVE32F-NEXT: sb s6, 123(sp)
-; ZVE32F-NEXT: sb s7, 124(sp)
-; ZVE32F-NEXT: sb s8, 125(sp)
-; ZVE32F-NEXT: sb a2, 126(sp)
-; ZVE32F-NEXT: ld a2, 104(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: sb a7, 127(sp)
-; ZVE32F-NEXT: mv a7, t1
-; ZVE32F-NEXT: mv t1, t2
-; ZVE32F-NEXT: mv t2, t3
-; ZVE32F-NEXT: mv t3, t4
-; ZVE32F-NEXT: mv t4, t5
-; ZVE32F-NEXT: mv t5, t6
-; ZVE32F-NEXT: mv t6, s0
-; ZVE32F-NEXT: ld s0, 56(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: addi s4, sp, 112
-; ZVE32F-NEXT: vle8.v v8, (s4)
-; ZVE32F-NEXT: vle8.v v9, (a0)
-; ZVE32F-NEXT: vadd.vv v8, v9, v8
-; ZVE32F-NEXT: vse8.v v8, (a0)
-; ZVE32F-NEXT: addi a3, a3, 16
-; ZVE32F-NEXT: addi s3, s3, 16
-; ZVE32F-NEXT: addi s2, s2, 16
-; ZVE32F-NEXT: addi s1, s1, 16
-; ZVE32F-NEXT: addi s0, s0, 16
-; ZVE32F-NEXT: addi t6, t6, 16
-; ZVE32F-NEXT: addi t5, t5, 16
-; ZVE32F-NEXT: addi t4, t4, 16
-; ZVE32F-NEXT: addi t3, t3, 16
-; ZVE32F-NEXT: addi t2, t2, 16
-; ZVE32F-NEXT: addi t1, t1, 16
-; ZVE32F-NEXT: addi t0, t0, 16
-; ZVE32F-NEXT: addi a7, a7, 16
-; ZVE32F-NEXT: addi a6, a6, 16
-; ZVE32F-NEXT: addi a5, a5, 16
-; ZVE32F-NEXT: addi a4, a4, 16
-; ZVE32F-NEXT: addi a2, a2, -16
-; ZVE32F-NEXT: addi a0, a0, 16
-; ZVE32F-NEXT: bnez a2, .LBB13_2
-; ZVE32F-NEXT: .LBB13_3: # %bb16
-; ZVE32F-NEXT: ld ra, 232(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s0, 224(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s1, 216(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s2, 208(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s3, 200(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s4, 192(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s5, 184(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s6, 176(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s7, 168(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s8, 160(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s9, 152(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s10, 144(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: ld s11, 136(sp) # 8-byte Folded Reload
-; ZVE32F-NEXT: addi sp, sp, 240
-; ZVE32F-NEXT: ret
+; CHECK-LABEL: gather_no_scalar_remainder:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: beqz a2, .LBB13_3
+; CHECK-NEXT: # %bb.1: # %bb2
+; CHECK-NEXT: li a3, 5
+; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, mu
+; CHECK-NEXT: .LBB13_2: # %bb4
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vlse8.v v8, (a1), a3
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: addi a2, a2, -16
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a1, a1, 80
+; CHECK-NEXT: bnez a2, .LBB13_2
+; CHECK-NEXT: .LBB13_3: # %bb16
+; CHECK-NEXT: ret
bb:
%i = shl i64 %arg2, 4
%i3 = icmp eq i64 %i, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
index fd8ff2675c73..126287e5304c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
@@ -867,10 +867,9 @@ define void @gather_no_scalar_remainder(i8* noalias nocapture noundef %arg, i8*
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb4:
; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ]
-; CHECK-NEXT: [[I6:%.*]] = phi <16 x i64> [ [[I14:%.*]], [[BB4]] ], [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BB2]] ]
-; CHECK-NEXT: [[I7:%.*]] = mul <16 x i64> [[I6]], <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT: [[I8:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], <16 x i64> [[I7]]
-; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> [[I8]], i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
+; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[ARG1:%.*]], i64 [[I6_SCALAR]]
+; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.riscv.masked.strided.load.v16i8.p0i8.i64(<16 x i8> undef, i8* [[TMP0]], i64 5, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, i8* [[ARG:%.*]], i64 [[I5]]
; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[I10]] to <16 x i8>*
; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, <16 x i8>* [[CAST]], align 1
@@ -878,7 +877,7 @@ define void @gather_no_scalar_remainder(i8* noalias nocapture noundef %arg, i8*
; CHECK-NEXT: [[CAST2:%.*]] = bitcast i8* [[I10]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[I12]], <16 x i8>* [[CAST2]], align 1
; CHECK-NEXT: [[I13]] = add nuw i64 [[I5]], 16
-; CHECK-NEXT: [[I14]] = add <16 x i64> [[I6]], <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT: [[I14_SCALAR]] = add i64 [[I6_SCALAR]], 80
; CHECK-NEXT: [[I15:%.*]] = icmp eq i64 [[I13]], [[I]]
; CHECK-NEXT: br i1 [[I15]], label [[BB16]], label [[BB4]]
; CHECK: bb16:
More information about the llvm-commits
mailing list