[llvm] 2d6e7ef - [LV] Add additional tests for replicating load/store costs.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 11:17:18 PDT 2025
Author: Alexey Bader
Date: 2025-10-01T19:15:19+01:00
New Revision: 2d6e7ef567a80b887904221c4eb1320b4d5684b9
URL: https://github.com/llvm/llvm-project/commit/2d6e7ef567a80b887904221c4eb1320b4d5684b9
DIFF: https://github.com/llvm/llvm-project/commit/2d6e7ef567a80b887904221c4eb1320b4d5684b9.diff
LOG: [LV] Add additional tests for replicating load/store costs.
Includes test for https://github.com/llvm/llvm-project/issues/161404
Added:
llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
Modified:
llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index c15e8d4252fba..ab9b48fb68f6b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -616,6 +616,45 @@ exit:
ret double %red.next
}
+define i32 @test_ptr_iv_load_used_by_other_load(ptr %start, ptr %end) {
+; CHECK-LABEL: define i32 @test_ptr_iv_load_used_by_other_load(
+; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ null, %[[ENTRY]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IV]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[TMP1]], 0
+; CHECK-NEXT: [[C_EXT:%.*]] = zext i1 [[C]] to i32
+; CHECK-NEXT: [[RED_NEXT]] = or i32 [[RED]], [[C_EXT]]
+; CHECK-NEXT: [[IV_NEXT]] = getelementptr nusw i8, ptr [[IV]], i64 32
+; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[END]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ [[RED]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[RED_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi ptr [ %iv.next, %loop ], [ null, %entry ]
+ %red = phi i32 [ %red.next, %loop ], [ 0, %entry ]
+ %0 = load ptr, ptr %iv, align 8
+ %1 = load i8, ptr %0, align 8
+ %c = icmp ne i8 %1, 0
+ %c.ext = zext i1 %c to i32
+ %red.next = or i32 %red, %c.ext
+ %iv.next = getelementptr nusw i8, ptr %iv, i64 32
+ %ec = icmp eq ptr %iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %red
+}
+
attributes #0 = { "target-cpu"="neoverse-512tvb" }
!0 = !{!1, !2, i64 0}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
new file mode 100644
index 0000000000000..d93932585460f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
+; RUN: opt -p loop-vectorize -mtriple=x86_64-linux-gnu -S %s | FileCheck --check-prefix=I64 %s
+; RUN: opt -p loop-vectorize -mtriple=i386-pc-linux-gnu -S %s | FileCheck --check-prefix=I32 %s
+
+
+define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
+; I64-LABEL: define void @test_store_initially_interleave(
+; I64-SAME: i32 [[N:%.*]], ptr noalias [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; I64-NEXT: [[ENTRY:.*]]:
+; I64-NEXT: br label %[[LOOP:.*]]
+; I64: [[LOOP]]:
+; I64-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; I64-NEXT: [[CONV:%.*]] = uitofp i32 [[IV]] to double
+; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
+; I64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADD_PTR_I]], align 4
+; I64-NEXT: store double [[CONV]], ptr [[TMP0]], align 4
+; I64-NEXT: [[INC]] = add i32 [[IV]], 1
+; I64-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
+; I64-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; I64: [[EXIT]]:
+; I64-NEXT: ret void
+;
+; I32-LABEL: define void @test_store_initially_interleave(
+; I32-SAME: i32 [[N:%.*]], ptr noalias [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; I32-NEXT: [[ENTRY:.*:]]
+; I32-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
+; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 8
+; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I32: [[VECTOR_PH]]:
+; I32-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8
+; I32-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
+; I32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 8, i32 [[N_MOD_VF]]
+; I32-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
+; I32-NEXT: br label %[[VECTOR_BODY:.*]]
+; I32: [[VECTOR_BODY]]:
+; I32-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I32-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I32-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; I32-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
+; I32-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
+; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
+; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
+; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
+; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
+; I32-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 4
+; I32-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 5
+; I32-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 6
+; I32-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 7
+; I32-NEXT: [[TMP11:%.*]] = uitofp <2 x i32> [[VEC_IND]] to <2 x double>
+; I32-NEXT: [[TMP12:%.*]] = uitofp <2 x i32> [[STEP_ADD]] to <2 x double>
+; I32-NEXT: [[TMP13:%.*]] = uitofp <2 x i32> [[STEP_ADD_2]] to <2 x double>
+; I32-NEXT: [[TMP14:%.*]] = uitofp <2 x i32> [[STEP_ADD_3]] to <2 x double>
+; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
+; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
+; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
+; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
+; I32-NEXT: [[TMP19:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]]
+; I32-NEXT: [[TMP20:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP8]]
+; I32-NEXT: [[TMP21:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP9]]
+; I32-NEXT: [[TMP22:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP10]]
+; I32-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP15]], align 4
+; I32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP16]], align 4
+; I32-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP17]], align 4
+; I32-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP18]], align 4
+; I32-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP19]], align 4
+; I32-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP20]], align 4
+; I32-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP21]], align 4
+; I32-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP22]], align 4
+; I32-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; I32-NEXT: store double [[TMP31]], ptr [[TMP23]], align 4
+; I32-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
+; I32-NEXT: store double [[TMP32]], ptr [[TMP24]], align 4
+; I32-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
+; I32-NEXT: store double [[TMP33]], ptr [[TMP25]], align 4
+; I32-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
+; I32-NEXT: store double [[TMP34]], ptr [[TMP26]], align 4
+; I32-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP13]], i32 0
+; I32-NEXT: store double [[TMP35]], ptr [[TMP27]], align 4
+; I32-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP13]], i32 1
+; I32-NEXT: store double [[TMP36]], ptr [[TMP28]], align 4
+; I32-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP14]], i32 0
+; I32-NEXT: store double [[TMP37]], ptr [[TMP29]], align 4
+; I32-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[TMP14]], i32 1
+; I32-NEXT: store double [[TMP38]], ptr [[TMP30]], align 4
+; I32-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; I32-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD_3]], splat (i32 2)
+; I32-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; I32-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; I32: [[MIDDLE_BLOCK]]:
+; I32-NEXT: br label %[[SCALAR_PH]]
+; I32: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %conv = uitofp i32 %iv to double
+ %add.ptr.i = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 %iv
+ %0 = load ptr, ptr %add.ptr.i, align 4
+ store double %conv, ptr %0, align 4
+ %inc = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, %n
+ br i1 %ec, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) #0 {
+; I64-LABEL: define void @test_store_loaded_value(
+; I64-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; I64-NEXT: [[BB:.*:]]
+; I64-NEXT: [[PRE:%.*]] = icmp slt i32 [[N]], 1
+; I64-NEXT: br i1 [[PRE]], [[EXIT:label %.*]], label %[[PH:.*]]
+; I64: [[PH]]:
+; I64-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
+; I64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N_EXT]], 4
+; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_EXT]], 4
+; I64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_EXT]], [[N_MOD_VF]]
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
+; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
+; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
+; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
+; I64-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8
+; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
+; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
+; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
+; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
+; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
+; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
+; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
+; I64-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
+; I64-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
+; I64-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
+; I64-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
+; I64-NEXT: store double [[TMP8]], ptr [[TMP16]], align 8
+; I64-NEXT: store double [[TMP9]], ptr [[TMP17]], align 8
+; I64-NEXT: store double [[TMP10]], ptr [[TMP18]], align 8
+; I64-NEXT: store double [[TMP11]], ptr [[TMP19]], align 8
+; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; I64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; I64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_EXT]], [[N_VEC]]
+; I64-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
+; I64: [[SCALAR_PH]]:
+;
+; I32-LABEL: define void @test_store_loaded_value(
+; I32-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; I32-NEXT: [[BB:.*:]]
+; I32-NEXT: [[PRE:%.*]] = icmp slt i32 [[N]], 1
+; I32-NEXT: br i1 [[PRE]], [[EXIT:label %.*]], label %[[PH:.*]]
+; I32: [[PH]]:
+; I32-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
+; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N_EXT]], 4
+; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I32: [[VECTOR_PH]]:
+; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_EXT]], 4
+; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_EXT]], [[N_MOD_VF]]
+; I32-NEXT: br label %[[VECTOR_BODY:.*]]
+; I32: [[VECTOR_BODY]]:
+; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
+; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
+; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
+; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
+; I32-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8
+; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
+; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
+; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
+; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
+; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
+; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
+; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
+; I32-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
+; I32-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
+; I32-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
+; I32-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
+; I32-NEXT: store double [[TMP8]], ptr [[TMP16]], align 8
+; I32-NEXT: store double [[TMP9]], ptr [[TMP17]], align 8
+; I32-NEXT: store double [[TMP10]], ptr [[TMP18]], align 8
+; I32-NEXT: store double [[TMP11]], ptr [[TMP19]], align 8
+; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; I32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; I32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; I32: [[MIDDLE_BLOCK]]:
+; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_EXT]], [[N_VEC]]
+; I32-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
+; I32: [[SCALAR_PH]]:
+;
+bb:
+ %pre = icmp slt i32 %n, 1
+ br i1 %pre, label %exit, label %ph
+
+ph:
+ %n.ext = zext i32 %n to i64
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+ %iv.next = add i64 %iv, 1
+ %gep.src = getelementptr i8, ptr %src, i64 %iv
+ %l = load double, ptr %gep.src, align 8
+ %sext = shl i64 %iv, 1
+ %gep.dst = getelementptr i8, ptr %dst, i64 %sext
+ store double %l, ptr %gep.dst, align 8
+ %ec = icmp eq i64 %iv.next, %n.ext
+ br i1 %ec, label %exit, label %loop, !llvm.loop !0
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-cpu"="znver2" }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
More information about the llvm-commits
mailing list