[llvm] a9ac22b - [LV] Add users for loads to make tests more robust.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 4 12:43:49 PST 2023
Author: Florian Hahn
Date: 2023-02-04T20:42:57Z
New Revision: a9ac22b501c5fd03a07a86ab65d675c84d3aa3cb
URL: https://github.com/llvm/llvm-project/commit/a9ac22b501c5fd03a07a86ab65d675c84d3aa3cb
DIFF: https://github.com/llvm/llvm-project/commit/a9ac22b501c5fd03a07a86ab65d675c84d3aa3cb.diff
LOG: [LV] Add users for loads to make tests more robust.
Update a few tests to add users to loads to avoid them being optimized
out by future changes. In cases the unused loads didn't matter for the
test, remove them.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index e718bda0ad3b1..db97dc6796738 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
; the loop, preventing the gep (and consequently the loop induction
; update variable) from being classified as 'uniform'.
-define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
+define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 {
; CHECK-LABEL: @test_no_scalarization(
; CHECK-NEXT: L.entry:
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[IDX:%.*]], 1
@@ -39,21 +39,26 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP14]], align 8
-; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0
+; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP17]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2
-; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 [[TMP20]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2
+; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 [[TMP23]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -62,12 +67,14 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
; CHECK: L.LoopBody:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[L_LOOPBODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i32 [[INDVAR]], 1
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
-; CHECK-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8
-; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[TMP24]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
+; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i64, ptr [[B]], i32 [[INDVAR]]
+; CHECK-NEXT: store double [[TMP26]], ptr [[GEP_B]], align 8
+; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[TMP27]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: L.exit:
-; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP22]], [[L_LOOPBODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP25]], [[L_LOOPBODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i64 1, ptr [[DOTLCSSA]], align 8
; CHECK-NEXT: ret void
;
@@ -79,6 +86,8 @@ L.LoopBody: ; preds = %L.LoopBody, %L.entr
%indvar.next = add nsw i32 %indvar, 1
%0 = getelementptr i64, ptr %a, i32 %indvar
%1 = load double, ptr %0, align 8
+ %gep.b = getelementptr i64, ptr %b, i32 %indvar
+ store double %1, ptr %gep.b
%2 = icmp slt i32 %indvar.next, %n
br i1 %2, label %L.LoopBody, label %L.exit
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
index b77ac10925e42..e919891874935 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
%0 = type { i32 }
%1 = type { i64 }
-define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
+define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
@@ -40,6 +40,7 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -50,9 +51,10 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, ptr [[P2]], i64 128
; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P2]], align 8
; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]]
-; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret ptr [[V_LCSSA]]
;
entry:
br label %loop
@@ -65,10 +67,10 @@ loop:
br i1 %b, label %exit, label %loop
exit:
- ret void
+ ret ptr %v
}
-define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
+define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
@@ -101,6 +103,7 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -113,7 +116,8 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]]
; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret ptr [[V_LCSSA]]
;
entry:
br label %loop
@@ -126,7 +130,7 @@ loop:
br i1 %b, label %exit, label %loop
exit:
- ret void
+ ret ptr %v
}
attributes #0 = { "target-cpu"="skylake" }
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 225b1c01294fe..85972114f8b3b 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -475,9 +475,8 @@ define void @test_first_order_recurrences_and_induction(ptr %ptr) {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
@@ -497,7 +496,6 @@ loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv
- %for.1.next = load i64, ptr %gep.ptr, align 2
%add.1 = add i64 %for.1, 10
store i64 %add.1, ptr %gep.ptr
%exitcond.not = icmp eq i64 %iv.next, 1000
@@ -518,9 +516,8 @@ define void @test_first_order_recurrences_and_induction2(ptr %ptr) {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
@@ -540,7 +537,6 @@ loop:
%for.1 = phi i64 [ 22, %entry ], [ %iv, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv
- %for.1.next = load i64, ptr %gep.ptr, align 2
%add.1 = add i64 %for.1, 10
store i64 %add.1, ptr %gep.ptr
%exitcond.not = icmp eq i64 %iv.next, 1000
More information about the llvm-commits
mailing list