[llvm] [AArch64][SLP] Add NFC test cases for floating point reductions (PR #106507)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 10:36:06 PDT 2024
================
@@ -0,0 +1,838 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \
+; RUN: -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16
+; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \
+; RUN: -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FP16
+
+define half @reduce_fast_half2(<2 x half> %vec2) {
+; CHECK-LABEL: define half @reduce_fast_half2(
+; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret half [[ADD1]]
+entry:
+ %elt0 = extractelement <2 x half> %vec2, i64 0
+ %elt1 = extractelement <2 x half> %vec2, i64 1
+ %add1 = fadd fast half %elt1, %elt0
+ ret half %add1
+}
+
+define half @reduce_half2(<2 x half> %vec2) {
+; CHECK-LABEL: define half @reduce_half2(
+; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: ret half [[ADD1]]
+entry:
+ %elt0 = extractelement <2 x half> %vec2, i64 0
+ %elt1 = extractelement <2 x half> %vec2, i64 1
+ %add1 = fadd half %elt1, %elt0
+ ret half %add1
+}
+
+define half @reduce_fast_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_fast_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[VEC4]])
+; CHECK-NEXT: ret half [[TMP0]]
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %add1
+ %add3 = fadd fast half %elt3, %add2
+ ret half %add3
+}
+
+define half @reduce_half4(<4 x half> %vec4) {
+; CHECK-LABEL: define half @reduce_half4(
+; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
+; CHECK-NEXT: ret half [[ADD3]]
+entry:
+ %elt0 = extractelement <4 x half> %vec4, i64 0
+ %elt1 = extractelement <4 x half> %vec4, i64 1
+ %elt2 = extractelement <4 x half> %vec4, i64 2
+ %elt3 = extractelement <4 x half> %vec4, i64 3
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %add1
+ %add3 = fadd half %elt3, %add2
+ ret half %add3
+}
+
+define half @reduce_fast_half8(<8 x half> %vec8) {
+; CHECK-LABEL: define half @reduce_fast_half8(
+; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[TMP0]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]]
+; CHECK-NEXT: ret half [[OP_RDX3]]
+entry:
+ %elt0 = extractelement <8 x half> %vec8, i64 0
+ %elt1 = extractelement <8 x half> %vec8, i64 1
+ %elt2 = extractelement <8 x half> %vec8, i64 2
+ %elt3 = extractelement <8 x half> %vec8, i64 3
+ %elt4 = extractelement <8 x half> %vec8, i64 4
+ %elt5 = extractelement <8 x half> %vec8, i64 5
+ %elt6 = extractelement <8 x half> %vec8, i64 6
+ %elt7 = extractelement <8 x half> %vec8, i64 7
+ %add1 = fadd fast half %elt1, %elt0
+ %add2 = fadd fast half %elt2, %add1
+ %add3 = fadd fast half %elt3, %add2
+ %add4 = fadd fast half %elt4, %add3
+ %add5 = fadd fast half %elt5, %add4
+ %add6 = fadd fast half %elt6, %add5
+ %add7 = fadd fast half %elt7, %add6
+ ret half %add7
+}
+
+define half @reduce_half8(<8 x half> %vec8) {
+; CHECK-LABEL: define half @reduce_half8(
+; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
+; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
+; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
+; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
+; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
+; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
+; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
+; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
+; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
+; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
+; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]]
+; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]]
+; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
+; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
+; CHECK-NEXT: ret half [[ADD7]]
+entry:
+ %elt0 = extractelement <8 x half> %vec8, i64 0
+ %elt1 = extractelement <8 x half> %vec8, i64 1
+ %elt2 = extractelement <8 x half> %vec8, i64 2
+ %elt3 = extractelement <8 x half> %vec8, i64 3
+ %elt4 = extractelement <8 x half> %vec8, i64 4
+ %elt5 = extractelement <8 x half> %vec8, i64 5
+ %elt6 = extractelement <8 x half> %vec8, i64 6
+ %elt7 = extractelement <8 x half> %vec8, i64 7
+ %add1 = fadd half %elt1, %elt0
+ %add2 = fadd half %elt2, %add1
+ %add3 = fadd half %elt3, %add2
+ %add4 = fadd half %elt4, %add3
+ %add5 = fadd half %elt5, %add4
+ %add6 = fadd half %elt6, %add5
+ %add7 = fadd half %elt7, %add6
+ ret half %add7
+}
+
+define half @reduce_fast_half16(<16 x half> %vec16) {
+; NOFP16-LABEL: define half @reduce_fast_half16(
+; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
+; NOFP16-NEXT: [[ENTRY:.*:]]
+; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[VEC16]])
+; NOFP16-NEXT: ret half [[TMP0]]
----------------
sushgokh wrote:
Yes, right. I have got a review few times to remove the last blank line generated by the update_test_checks script and removed them manually. But I have ran the test through llvm-lit and it passes. I hope this is fine.
https://github.com/llvm/llvm-project/pull/106507
More information about the llvm-commits
mailing list