[llvm] 197fb27 - [AArch64][NFC] NFC for const vector as Instruction operand (#116790)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 20:53:08 PST 2024
Author: Sushant Gokhale
Date: 2024-11-21T10:23:05+05:30
New Revision: 197fb270cc2f947bdde047d9aac65b653f4f6f26
URL: https://github.com/llvm/llvm-project/commit/197fb270cc2f947bdde047d9aac65b653f4f6f26
DIFF: https://github.com/llvm/llvm-project/commit/197fb270cc2f947bdde047d9aac65b653f4f6f26.diff
LOG: [AArch64][NFC] NFC for const vector as Instruction operand (#116790)
Current cost-modelling does not take into account cost of materializing
const vector. This results in some cases, as the test shows, being
vectorized but this may not always be profitable. Future patch will try
to address this issue.
Added:
llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll
new file mode 100644
index 00000000000000..2f58bd25b75647
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64 -S %s | FileCheck %s %}
+
+define <2 x float> @v2f32_
diff _consts(float %a, float %b)
+; CHECK-LABEL: define <2 x float> @v2f32_
diff _consts(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], <float 2.200000e+01, float 2.300000e+01>
+; CHECK-NEXT: ret <2 x float> [[TMP3]]
+;
+{
+ %1 = fmul float %a, 22.0
+ %2 = fmul float %b, 23.0
+ %3 = insertelement <2 x float> poison, float %1, i32 0
+ %4 = insertelement <2 x float> %3, float %2, i32 1
+ ret <2 x float> %4
+}
+
+define <2 x float> @v2f32_const_splat(float %a, float %b)
+; CHECK-LABEL: define <2 x float> @v2f32_const_splat(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], splat (float 2.200000e+01)
+; CHECK-NEXT: ret <2 x float> [[TMP3]]
+;
+{
+ %1 = fmul float %a, 22.0
+ %2 = fmul float %b, 22.0
+ %3 = insertelement <2 x float> poison, float %1, i32 0
+ %4 = insertelement <2 x float> %3, float %2, i32 1
+ ret <2 x float> %4
+}
+
+define <4 x double> @v4f64_illegal_type(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <4 x double> @v4f64_illegal_type(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[C]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[D]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], <double 2.100000e+01, double 2.200000e+01, double 2.300000e+01, double 2.400000e+01>
+; CHECK-NEXT: ret <4 x double> [[TMP5]]
+;
+{
+ %1 = fmul double %a, 21.0
+ %2 = fmul double %b, 22.0
+ %3 = fmul double %c, 23.0
+ %4 = fmul double %d, 24.0
+ %5 = insertelement <4 x double> poison, double %1, i32 0
+ %6 = insertelement <4 x double> %5, double %2, i32 1
+ %7 = insertelement <4 x double> %6, double %3, i32 2
+ %8 = insertelement <4 x double> %7, double %4, i32 3
+ ret <4 x double> %8
+}
+
+define <2 x double> @v2f64_dup_const_vector_case1(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case1(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]]
+; CHECK-NEXT: ret <2 x double> [[TMP7]]
+;
+{
+ %1 = fmul double %a, 21.0
+ %2 = fmul double %b, 22.0
+ %3 = fmul double %c, 21.0
+ %4 = fmul double %d, 22.0
+ %5 = insertelement <2 x double> poison, double %1, i32 0
+ %6 = insertelement <2 x double> %5, double %2, i32 1
+ %7 = insertelement <2 x double> poison, double %3, i32 0
+ %8 = insertelement <2 x double> %7, double %4, i32 1
+ %9 = fadd <2 x double> %6, %8
+ ret <2 x double> %9
+}
+
+define <2 x double> @v2f64_dup_const_vector_case2(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case2(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT: ret <2 x double> [[TMP4]]
+;
+{
+ %1 = fmul double %a, 21.0
+ %2 = fmul double %b, 22.0
+ %3 = fadd double %1, 21.0
+ %4 = fadd double %2, 22.0
+ %5 = insertelement <2 x double> poison, double %3, i32 0
+ %6 = insertelement <2 x double> %5, double %4, i32 1
+ ret <2 x double> %6
+}
More information about the llvm-commits
mailing list