[llvm] 8cb8cea - [ARM] Fixup of a few test cases. NFC.
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 9 03:15:46 PDT 2020
Author: Sjoerd Meijer
Date: 2020-09-09T11:14:44+01:00
New Revision: 8cb8cea1bd7f03330fc310b8993a3be89da90c1d
URL: https://github.com/llvm/llvm-project/commit/8cb8cea1bd7f03330fc310b8993a3be89da90c1d
DIFF: https://github.com/llvm/llvm-project/commit/8cb8cea1bd7f03330fc310b8993a3be89da90c1d.diff
LOG: [ARM] Fixup of a few test cases. NFC.
After changing the semantics of get.active.lane.mask, I missed a few tests
that should use now the tripcount instead of the backedge taken count.
Added:
Modified:
llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
llvm/test/CodeGen/Thumb2/active_lane_mask.ll
llvm/test/Verifier/get-active-lane-mask.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 0554742369fd..b5cac5d6a3cf 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -9,7 +9,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocaptur
; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.8 lr, r2
-; CHECK: .LBB0_2: @ %vector.body
+; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q1, [r1], #16
; CHECK-NEXT: vldrb.u8 q2, [r0], #16
@@ -75,7 +75,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB1_2: @ %vector.body
+; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
@@ -148,7 +148,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocaptur
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB2_2: @ %vector.body
+; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
@@ -218,7 +218,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocaptu
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB3_2: @ %vector.body
+; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
@@ -290,7 +290,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocaptur
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB4_2: @ %vector.body
+; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
@@ -360,7 +360,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocaptu
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB5_2: @ %vector.body
+; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
@@ -432,7 +432,7 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB6_2: @ %vector.body
+; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: vmov q0, q1
@@ -454,7 +454,7 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: vmov.32 q0[0], r12
-; CHECK: .LBB6_5: @ %vector.body46
+; CHECK-NEXT: .LBB6_5: @ %vector.body46
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: vmov q1, q0
@@ -559,7 +559,7 @@ define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocaptur
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: dls lr, lr
-; CHECK: .LBB7_2: @ %vector.body
+; CHECK-NEXT: .LBB7_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
@@ -670,32 +670,31 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: cmp r1, r2
; CHECK-NEXT: cset r4, lo
; CHECK-NEXT: .LBB8_4: @ %lor.end
-; CHECK-NEXT: ldr.w r3, [r12, #4]
-; CHECK-NEXT: cmp r3, #1
+; CHECK-NEXT: ldr.w r1, [r12, #4]
+; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r4, pc}
; CHECK-NEXT: .LBB8_5: @ %vector.ph
-; CHECK-NEXT: adds r1, r3, #3
+; CHECK-NEXT: adds r3, r1, #3
; CHECK-NEXT: movs r2, #1
-; CHECK-NEXT: bic r1, r1, #3
-; CHECK-NEXT: subs r1, #4
-; CHECK-NEXT: add.w lr, r2, r1, lsr #2
-; CHECK-NEXT: movw r1, :lower16:days
-; CHECK-NEXT: movt r1, :upper16:days
-; CHECK-NEXT: movs r2, #52
-; CHECK-NEXT: mla r1, r4, r2, r1
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: vdup.32 q0, r2
+; CHECK-NEXT: bic r3, r3, #3
+; CHECK-NEXT: subs r3, #4
+; CHECK-NEXT: add.w lr, r2, r3, lsr #2
+; CHECK-NEXT: movw r2, :lower16:days
+; CHECK-NEXT: movt r2, :upper16:days
+; CHECK-NEXT: movs r3, #52
+; CHECK-NEXT: mla r2, r4, r3, r2
+; CHECK-NEXT: movs r3, #0
+; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vmov.32 q0[0], r0
-; CHECK-NEXT: subs r0, r3, #1
-; CHECK: .LBB8_6: @ %vector.body
+; CHECK-NEXT: .LBB8_6: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vctp.32 r1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrwt.u32 q0, [r1], #16
-; CHECK-NEXT: subs r0, #4
+; CHECK-NEXT: vldrwt.u32 q0, [r2], #16
+; CHECK-NEXT: subs r1, #4
; CHECK-NEXT: vadd.i32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB8_6
; CHECK-NEXT: @ %bb.7: @ %middle.block
@@ -738,7 +737,7 @@ vector.body: ; preds = %vector.body, %vecto
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = getelementptr inbounds [2 x [13 x i32]], [2 x [13 x i32]]* @days, i32 0, i32 %3, i32 %index
- %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %trip.count.minus.1)
+ %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %4)
%7 = bitcast i32* %6 to <4 x i32>*
%wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
%8 = add <4 x i32> %wide.masked.load, %vec.phi
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
index 5b2f3a7c98e8..98d48d49539c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
@@ -10,7 +10,6 @@ define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
-; CHECK-NEXT: subs r3, #1
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -36,7 +35,7 @@ vector.body: ; preds = %vector.body, %vecto
%next.gep = getelementptr i16, i16* %pSrcA, i32 %index
%next.gep20 = getelementptr i16, i16* %pDst, i32 %index
%next.gep21 = getelementptr i16, i16* %pSrcB, i32 %index
- %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %trip.count.minus.1)
+ %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %blockSize)
%0 = bitcast i16* %next.gep to <8 x i16>*
%wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%1 = bitcast i16* %next.gep21 to <8 x i16>*
@@ -61,7 +60,6 @@ define arm_aapcs_vfpcc void @ssub_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.ph
-; CHECK-NEXT: subs r3, #1
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -87,7 +85,7 @@ vector.body: ; preds = %vector.body, %vecto
%next.gep = getelementptr i16, i16* %pSrcA, i32 %index
%next.gep20 = getelementptr i16, i16* %pDst, i32 %index
%next.gep21 = getelementptr i16, i16* %pSrcB, i32 %index
- %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %trip.count.minus.1)
+ %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %blockSize)
%0 = bitcast i16* %next.gep to <8 x i16>*
%wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
%1 = bitcast i16* %next.gep21 to <8 x i16>*
diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
index 116031cb895f..2a5d32013d47 100644
--- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
+++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -o - | FileCheck %s
-define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) {
+define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) {
; CHECK-LABEL: v4i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: adr.w r12, .LCPI0_0
@@ -28,12 +28,12 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) {
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 2 @ 0x2
; CHECK-NEXT: .long 3 @ 0x3
- %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %BTC)
+ %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
%select = select <4 x i1> %active.lane.mask, <4 x i32> %V1, <4 x i32> %V2
ret <4 x i32> %select
}
-define <7 x i32> @v7i32(i32 %index, i32 %BTC, <7 x i32> %V1, <7 x i32> %V2) {
+define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
; CHECK-LABEL: v7i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: adr r3, .LCPI1_0
@@ -105,12 +105,12 @@ define <7 x i32> @v7i32(i32 %index, i32 %BTC, <7 x i32> %V1, <7 x i32> %V2) {
; CHECK-NEXT: .long 5 @ 0x5
; CHECK-NEXT: .long 6 @ 0x6
; CHECK-NEXT: .zero 4
- %active.lane.mask = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32 %index, i32 %BTC)
+ %active.lane.mask = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32 %index, i32 %TC)
%select = select <7 x i1> %active.lane.mask, <7 x i32> %V1, <7 x i32> %V2
ret <7 x i32> %select
}
-define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
+define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-LABEL: v8i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
@@ -189,12 +189,12 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-NEXT: .long 5 @ 0x5
; CHECK-NEXT: .long 6 @ 0x6
; CHECK-NEXT: .long 7 @ 0x7
- %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %BTC)
+ %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
%select = select <8 x i1> %active.lane.mask, <8 x i16> %V1, <8 x i16> %V2
ret <8 x i16> %select
}
-define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
+define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-LABEL: v16i8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
@@ -405,7 +405,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: .long 13 @ 0xd
; CHECK-NEXT: .long 14 @ 0xe
; CHECK-NEXT: .long 15 @ 0xf
- %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %BTC)
+ %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
%select = select <16 x i1> %active.lane.mask, <16 x i8> %V1, <16 x i8> %V2
ret <16 x i8> %select
}
diff --git a/llvm/test/Verifier/get-active-lane-mask.ll b/llvm/test/Verifier/get-active-lane-mask.ll
index 94d819b5c75b..c637916faccf 100644
--- a/llvm/test/Verifier/get-active-lane-mask.ll
+++ b/llvm/test/Verifier/get-active-lane-mask.ll
@@ -2,20 +2,20 @@
declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32)
-define <4 x i32> @t1(i32 %IV, i32 %BTC) {
+define <4 x i32> @t1(i32 %IV, i32 %TC) {
; CHECK: get_active_lane_mask: element type is not i1
-; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %TC)
- %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+ %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %TC)
ret <4 x i32> %res
}
declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32)
-define i32 @t2(i32 %IV, i32 %BTC) {
+define i32 @t2(i32 %IV, i32 %TC) {
; CHECK: Intrinsic has incorrect return type!
; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32
- %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC)
+ %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %TC)
ret i32 %res
}
More information about the llvm-commits
mailing list