[llvm] ba3fa39 - [EarlyCSE] Re-generate checks for intrinsics.ll.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 6 04:20:47 PDT 2025
Author: Florian Hahn
Date: 2025-04-06T12:20:31+01:00
New Revision: ba3fa39b63d7185f7d067f35a39c2fea40ee8861
URL: https://github.com/llvm/llvm-project/commit/ba3fa39b63d7185f7d067f35a39c2fea40ee8861
DIFF: https://github.com/llvm/llvm-project/commit/ba3fa39b63d7185f7d067f35a39c2fea40ee8861.diff
LOG: [EarlyCSE] Re-generate checks for intrinsics.ll.
Added:
Modified:
llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
index 69a503176aedf..94b17510bb95d 100644
--- a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
+++ b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
@@ -1,11 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse -earlycse-debug-hash | FileCheck %s
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' | FileCheck %s
define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
+; CHECK-LABEL: define <4 x i32> @test_cse(
+; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0
+; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that @llvm.aarch64.neon.ld2 is optimized away by Early CSE.
-; CHECK-LABEL: @test_cse
-; CHECK-NOT: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -34,11 +56,32 @@ for.end: ; preds = %for.cond
}
define <4 x i32> @test_cse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
+; CHECK-LABEL: define <4 x i32> @test_cse2(
+; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0
+; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that the first @llvm.aarch64.neon.st2 is optimized away by Early CSE.
-; CHECK-LABEL: @test_cse2
-; CHECK-NOT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %3, <4 x i32> %3, ptr %0)
-; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %s.coerce.fca.0.extract, <4 x i32> %s.coerce.fca.1.extract, ptr %a)
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -68,11 +111,26 @@ for.end: ; preds = %for.cond
}
define <4 x i32> @test_cse3(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) #0 {
+; CHECK-LABEL: define <4 x i32> @test_cse3(
+; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD2_FCA_0_EXTRACT]], <4 x i32> [[VLD2_FCA_0_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that the first @llvm.aarch64.neon.ld2 is optimized away by Early CSE.
-; CHECK-LABEL: @test_cse3
-; CHECK: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0
-; CHECK-NOT: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -100,11 +158,33 @@ for.end: ; preds = %for.cond
define <4 x i32> @test_nocse(ptr %a, ptr %b, [2 x <4 x i32>] %s.coerce, i32 %n) {
+; CHECK-LABEL: define <4 x i32> @test_nocse(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0
+; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: store i32 0, ptr [[B]], align 4
+; CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD2_FCA_0_EXTRACT]], <4 x i32> [[VLD2_FCA_0_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that the store prevents @llvm.aarch64.neon.ld2 from being optimized
; away by Early CSE.
-; CHECK-LABEL: @test_nocse
-; CHECK: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -134,11 +214,33 @@ for.end: ; preds = %for.cond
}
define <4 x i32> @test_nocse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
+; CHECK-LABEL: define <4 x i32> @test_nocse2(
+; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0
+; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0
+; CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 2
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD3_FCA_0_EXTRACT]], <4 x i32> [[VLD3_FCA_2_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that @llvm.aarch64.neon.ld3 is not optimized away by Early CSE due
; to mismatch between st2 and ld3.
-; CHECK-LABEL: @test_nocse2
-; CHECK: call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -167,12 +269,33 @@ for.end: ; preds = %for.cond
}
define <4 x i32> @test_nocse3(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
+; CHECK-LABEL: define <4 x i32> @test_nocse3(
+; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0
+; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[S_COERCE_FCA_1_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]])
+; CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0
+; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD3_FCA_0_EXTRACT]], <4 x i32> [[VLD3_FCA_0_EXTRACT]])
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret <4 x i32> [[RES_0]]
+;
entry:
; Check that @llvm.aarch64.neon.st3 is not optimized away by Early CSE due to
; mismatch between st2 and st3.
-; CHECK-LABEL: @test_nocse3
-; CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0
-; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0
%s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
%s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
br label %for.cond
@@ -214,6 +337,12 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr)
declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr)
define internal fastcc <4 x i32> @vaddq_s32(<4 x i32> %__p0, <4 x i32> %__p1) {
+; CHECK-LABEL: define internal fastcc <4 x i32> @vaddq_s32(
+; CHECK-SAME: <4 x i32> [[__P0:%.*]], <4 x i32> [[__P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[__P0]], [[__P1]]
+; CHECK-NEXT: ret <4 x i32> [[ADD]]
+;
entry:
%add = add <4 x i32> %__p0, %__p1
ret <4 x i32> %add
More information about the llvm-commits
mailing list