[llvm] 893479a - [RISCV] Precommit test for unrolling loops with vector operands
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 19:52:07 PST 2025
Author: Pengcheng Wang
Date: 2025-12-09T11:51:33+08:00
New Revision: 893479adcc03e8ce0ed5e0b550cae7d32724cd9a
URL: https://github.com/llvm/llvm-project/commit/893479adcc03e8ce0ed5e0b550cae7d32724cd9a
DIFF: https://github.com/llvm/llvm-project/commit/893479adcc03e8ce0ed5e0b550cae7d32724cd9a.diff
LOG: [RISCV] Precommit test for unrolling loops with vector operands
Added:
Modified:
llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
index b575057ff6d15..e4b18ad6c705f 100644
--- a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-unroll -mtriple riscv64 -mattr=+v,+f -S %s | FileCheck %s --check-prefixes=COMMON,CHECK
-; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-s76 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
+; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-p870 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
define void @reverse(ptr %dst, ptr %src, i64 %len) {
; CHECK-LABEL: define void @reverse(
@@ -248,7 +248,7 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
; SIFIVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]]
; SIFIVE: [[VECTOR_BODY]]:
-; SIFIVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; SIFIVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT_15:%.*]], %[[VECTOR_BODY]] ]
; SIFIVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
; SIFIVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
; SIFIVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
@@ -276,9 +276,93 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
; SIFIVE-NEXT: [[WIDE_LOAD12_3:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
; SIFIVE-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_3]], <4 x float> [[WIDE_LOAD12_3]])
; SIFIVE-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
-; SIFIVE-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 16
-; SIFIVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; SIFIVE-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
+; SIFIVE-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
+; SIFIVE-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
+; SIFIVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_4:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
+; SIFIVE-NEXT: [[TMP14:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_4]], <4 x float> [[WIDE_LOAD12_4]])
+; SIFIVE-NEXT: store <4 x float> [[TMP14]], ptr [[TMP13]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX]], 20
+; SIFIVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
+; SIFIVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_5:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; SIFIVE-NEXT: [[TMP17:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_5]], <4 x float> [[WIDE_LOAD12_5]])
+; SIFIVE-NEXT: store <4 x float> [[TMP17]], ptr [[TMP16]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX]], 24
+; SIFIVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP18]], align 4
+; SIFIVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_6:%.*]] = load <4 x float>, ptr [[TMP19]], align 4
+; SIFIVE-NEXT: [[TMP20:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_6]], <4 x float> [[WIDE_LOAD12_6]])
+; SIFIVE-NEXT: store <4 x float> [[TMP20]], ptr [[TMP19]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_6:%.*]] = add nuw nsw i64 [[INDEX]], 28
+; SIFIVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP21]], align 4
+; SIFIVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4
+; SIFIVE-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_7]], <4 x float> [[WIDE_LOAD12_7]])
+; SIFIVE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP22]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX]], 32
+; SIFIVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP24]], align 4
+; SIFIVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_8:%.*]] = load <4 x float>, ptr [[TMP25]], align 4
+; SIFIVE-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_8]], <4 x float> [[WIDE_LOAD12_8]])
+; SIFIVE-NEXT: store <4 x float> [[TMP26]], ptr [[TMP25]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_8:%.*]] = add nuw nsw i64 [[INDEX]], 36
+; SIFIVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
+; SIFIVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_9:%.*]] = load <4 x float>, ptr [[TMP28]], align 4
+; SIFIVE-NEXT: [[TMP29:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_9]], <4 x float> [[WIDE_LOAD12_9]])
+; SIFIVE-NEXT: store <4 x float> [[TMP29]], ptr [[TMP28]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_9:%.*]] = add nuw nsw i64 [[INDEX]], 40
+; SIFIVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x float>, ptr [[TMP30]], align 4
+; SIFIVE-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_10:%.*]] = load <4 x float>, ptr [[TMP31]], align 4
+; SIFIVE-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_10]], <4 x float> [[WIDE_LOAD12_10]])
+; SIFIVE-NEXT: store <4 x float> [[TMP32]], ptr [[TMP31]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_10:%.*]] = add nuw nsw i64 [[INDEX]], 44
+; SIFIVE-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x float>, ptr [[TMP33]], align 4
+; SIFIVE-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4
+; SIFIVE-NEXT: [[TMP35:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_11]], <4 x float> [[WIDE_LOAD12_11]])
+; SIFIVE-NEXT: store <4 x float> [[TMP35]], ptr [[TMP34]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_11:%.*]] = add nuw nsw i64 [[INDEX]], 48
+; SIFIVE-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x float>, ptr [[TMP36]], align 4
+; SIFIVE-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_12:%.*]] = load <4 x float>, ptr [[TMP37]], align 4
+; SIFIVE-NEXT: [[TMP38:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_12]], <4 x float> [[WIDE_LOAD12_12]])
+; SIFIVE-NEXT: store <4 x float> [[TMP38]], ptr [[TMP37]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_12:%.*]] = add nuw nsw i64 [[INDEX]], 52
+; SIFIVE-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x float>, ptr [[TMP39]], align 4
+; SIFIVE-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_13:%.*]] = load <4 x float>, ptr [[TMP40]], align 4
+; SIFIVE-NEXT: [[TMP41:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_13]], <4 x float> [[WIDE_LOAD12_13]])
+; SIFIVE-NEXT: store <4 x float> [[TMP41]], ptr [[TMP40]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_13:%.*]] = add nuw nsw i64 [[INDEX]], 56
+; SIFIVE-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x float>, ptr [[TMP42]], align 4
+; SIFIVE-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_14:%.*]] = load <4 x float>, ptr [[TMP43]], align 4
+; SIFIVE-NEXT: [[TMP44:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_14]], <4 x float> [[WIDE_LOAD12_14]])
+; SIFIVE-NEXT: store <4 x float> [[TMP44]], ptr [[TMP43]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_14:%.*]] = add nuw nsw i64 [[INDEX]], 60
+; SIFIVE-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x float>, ptr [[TMP45]], align 4
+; SIFIVE-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT: [[WIDE_LOAD12_15:%.*]] = load <4 x float>, ptr [[TMP46]], align 4
+; SIFIVE-NEXT: [[TMP47:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_15]], <4 x float> [[WIDE_LOAD12_15]])
+; SIFIVE-NEXT: store <4 x float> [[TMP47]], ptr [[TMP46]], align 4
+; SIFIVE-NEXT: [[INDEX_NEXT_15]] = add nuw nsw i64 [[INDEX]], 64
+; SIFIVE-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT_15]], 1024
+; SIFIVE-NEXT: br i1 [[TMP48]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
; SIFIVE: [[EXIT]]:
; SIFIVE-NEXT: ret void
;
@@ -345,8 +429,6 @@ vector.body: ; preds = %vector.body, %entry
exit: ; preds = %vector.body
ret void
}
-!0 = !{!0, !1}
-!1 = !{!"llvm.loop.isvectorized", i32 1}
; On SiFive we should runtime unroll the scalar epilogue loop, but not the
; vector loop.
@@ -587,6 +669,127 @@ exit:
ret void
}
+define void @vector_operands(ptr %p, i64 %n) {
+; CHECK-LABEL: define void @vector_operands(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]])
+; CHECK-NEXT: [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[VL_ZEXT]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+; SIFIVE-LABEL: define void @vector_operands(
+; SIFIVE-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SIFIVE-NEXT: [[ENTRY:.*]]:
+; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]]
+; SIFIVE: [[VECTOR_BODY]]:
+; SIFIVE-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT_7:%.*]], %[[VECTOR_BODY_7:.*]] ]
+; SIFIVE-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT_7:%.*]], %[[VECTOR_BODY_7]] ]
+; SIFIVE-NEXT: [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]])
+; SIFIVE-NEXT: [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT:%.*]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]]
+; SIFIVE-NEXT: [[AVL_NEXT:%.*]] = sub nuw i64 [[AVL]], [[VL_ZEXT]]
+; SIFIVE-NEXT: [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; SIFIVE-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY_1:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_1]]:
+; SIFIVE-NEXT: [[VL_1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_1:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_1]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_1]])
+; SIFIVE-NEXT: [[VL_ZEXT_1:%.*]] = zext i32 [[VL_1]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_1:%.*]] = add nuw i64 [[VL_ZEXT_1]], [[INDEX_EVL_NEXT]]
+; SIFIVE-NEXT: [[AVL_NEXT_1:%.*]] = sub nuw i64 [[AVL_NEXT]], [[VL_ZEXT_1]]
+; SIFIVE-NEXT: [[TMP1:%.*]] = icmp eq i64 [[AVL_NEXT_1]], 0
+; SIFIVE-NEXT: br i1 [[TMP1]], label %[[EXIT]], label %[[VECTOR_BODY_2:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_2]]:
+; SIFIVE-NEXT: [[VL_2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_1]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_2:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_1]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_2]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_2]])
+; SIFIVE-NEXT: [[VL_ZEXT_2:%.*]] = zext i32 [[VL_2]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_2:%.*]] = add nuw i64 [[VL_ZEXT_2]], [[INDEX_EVL_NEXT_1]]
+; SIFIVE-NEXT: [[AVL_NEXT_2:%.*]] = sub nuw i64 [[AVL_NEXT_1]], [[VL_ZEXT_2]]
+; SIFIVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[AVL_NEXT_2]], 0
+; SIFIVE-NEXT: br i1 [[TMP2]], label %[[EXIT]], label %[[VECTOR_BODY_3:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_3]]:
+; SIFIVE-NEXT: [[VL_3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_2]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_3:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_2]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_3]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_3]])
+; SIFIVE-NEXT: [[VL_ZEXT_3:%.*]] = zext i32 [[VL_3]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_3:%.*]] = add nuw i64 [[VL_ZEXT_3]], [[INDEX_EVL_NEXT_2]]
+; SIFIVE-NEXT: [[AVL_NEXT_3:%.*]] = sub nuw i64 [[AVL_NEXT_2]], [[VL_ZEXT_3]]
+; SIFIVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[AVL_NEXT_3]], 0
+; SIFIVE-NEXT: br i1 [[TMP3]], label %[[EXIT]], label %[[VECTOR_BODY_4:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_4]]:
+; SIFIVE-NEXT: [[VL_4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_3]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_4:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_3]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_4]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_4]])
+; SIFIVE-NEXT: [[VL_ZEXT_4:%.*]] = zext i32 [[VL_4]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_4:%.*]] = add nuw i64 [[VL_ZEXT_4]], [[INDEX_EVL_NEXT_3]]
+; SIFIVE-NEXT: [[AVL_NEXT_4:%.*]] = sub nuw i64 [[AVL_NEXT_3]], [[VL_ZEXT_4]]
+; SIFIVE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[AVL_NEXT_4]], 0
+; SIFIVE-NEXT: br i1 [[TMP4]], label %[[EXIT]], label %[[VECTOR_BODY_5:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_5]]:
+; SIFIVE-NEXT: [[VL_5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_4]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_5:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_4]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_5]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_5]])
+; SIFIVE-NEXT: [[VL_ZEXT_5:%.*]] = zext i32 [[VL_5]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_5:%.*]] = add nuw i64 [[VL_ZEXT_5]], [[INDEX_EVL_NEXT_4]]
+; SIFIVE-NEXT: [[AVL_NEXT_5:%.*]] = sub nuw i64 [[AVL_NEXT_4]], [[VL_ZEXT_5]]
+; SIFIVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT_5]], 0
+; SIFIVE-NEXT: br i1 [[TMP5]], label %[[EXIT]], label %[[VECTOR_BODY_6:.*]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_6]]:
+; SIFIVE-NEXT: [[VL_6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_5]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_6:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_5]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_6]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_6]])
+; SIFIVE-NEXT: [[VL_ZEXT_6:%.*]] = zext i32 [[VL_6]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_6:%.*]] = add nuw i64 [[VL_ZEXT_6]], [[INDEX_EVL_NEXT_5]]
+; SIFIVE-NEXT: [[AVL_NEXT_6:%.*]] = sub nuw i64 [[AVL_NEXT_5]], [[VL_ZEXT_6]]
+; SIFIVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT_6]], 0
+; SIFIVE-NEXT: br i1 [[TMP6]], label %[[EXIT]], label %[[VECTOR_BODY_7]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[VECTOR_BODY_7]]:
+; SIFIVE-NEXT: [[VL_7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL_NEXT_6]], i32 2, i1 true)
+; SIFIVE-NEXT: [[ADDR_7:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX_EVL_NEXT_6]]
+; SIFIVE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR_7]], <vscale x 2 x i1> splat (i1 true), i32 [[VL_7]])
+; SIFIVE-NEXT: [[VL_ZEXT_7:%.*]] = zext i32 [[VL_7]] to i64
+; SIFIVE-NEXT: [[INDEX_EVL_NEXT_7]] = add nuw i64 [[VL_ZEXT_7]], [[INDEX_EVL_NEXT_6]]
+; SIFIVE-NEXT: [[AVL_NEXT_7]] = sub nuw i64 [[AVL_NEXT_6]], [[VL_ZEXT_7]]
+; SIFIVE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT_7]], 0
+; SIFIVE-NEXT: br i1 [[TMP7]], label %[[EXIT]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2]]
+; SIFIVE: [[EXIT]]:
+; SIFIVE-NEXT: ret void
+;
+entry:
+ br label %vector.body
+
+vector.body:
+ %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
+ %avl = phi i64 [ %n, %entry ], [ %avl.next, %vector.body ]
+ %vl = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
+ %addr = getelementptr i64, ptr %p, i64 %evl.based.iv
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> splat (i64 0), ptr align 8 %addr, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+ %vl.zext = zext i32 %vl to i64
+ %index.evl.next = add nuw i64 %vl.zext, %evl.based.iv
+ %avl.next = sub nuw i64 %avl, %vl.zext
+ %3 = icmp eq i64 %avl.next, 0
+ br i1 %3, label %exit, label %vector.body, !llvm.loop !2
+
+exit:
+ ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = distinct !{!2, !1}
!3 = distinct !{!3, !1}
More information about the llvm-commits
mailing list