[llvm] 197fb27 - [AArch64][NFC] NFC for const vector as Instruction operand (#116790)

Wed Nov 20 20:53:08 PST 2024

Author: Sushant Gokhale
Date: 2024-11-21T10:23:05+05:30
New Revision: 197fb270cc2f947bdde047d9aac65b653f4f6f26

URL: https://github.com/llvm/llvm-project/commit/197fb270cc2f947bdde047d9aac65b653f4f6f26
DIFF: https://github.com/llvm/llvm-project/commit/197fb270cc2f947bdde047d9aac65b653f4f6f26.diff

LOG: [AArch64][NFC] NFC for const vector as Instruction operand (#116790)

Current cost-modelling does not take into account cost of materializing
const vector. This results in some cases, as the test shows, being
vectorized but this may not always be profitable. Future patch will try
to address this issue.

Added: 
    llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll
new file mode 100644
index 00000000000000..2f58bd25b75647

--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64 -S %s | FileCheck %s %}
+
+define <2 x float> @v2f32_
diff _consts(float %a, float %b)
+; CHECK-LABEL: define <2 x float> @v2f32_
diff _consts(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], <float 2.200000e+01, float 2.300000e+01>
+; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+;
+{
+  %1 = fmul float %a, 22.0
+  %2 = fmul float %b, 23.0
+  %3 = insertelement <2 x float> poison, float %1, i32 0
+  %4 = insertelement <2 x float> %3, float %2, i32 1
+  ret <2 x float> %4
+}
+
+define <2 x float> @v2f32_const_splat(float %a, float %b)
+; CHECK-LABEL: define <2 x float> @v2f32_const_splat(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], splat (float 2.200000e+01)
+; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+;
+{
+  %1 = fmul float %a, 22.0
+  %2 = fmul float %b, 22.0
+  %3 = insertelement <2 x float> poison, float %1, i32 0
+  %4 = insertelement <2 x float> %3, float %2, i32 1
+  ret <2 x float> %4
+}
+
+define <4 x double> @v4f64_illegal_type(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <4 x double> @v4f64_illegal_type(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[C]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[D]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], <double 2.100000e+01, double 2.200000e+01, double 2.300000e+01, double 2.400000e+01>
+; CHECK-NEXT:    ret <4 x double> [[TMP5]]
+;
+{
+  %1 = fmul double %a, 21.0
+  %2 = fmul double %b, 22.0
+  %3 = fmul double %c, 23.0
+  %4 = fmul double %d, 24.0
+  %5 = insertelement <4 x double> poison, double %1, i32 0
+  %6 = insertelement <4 x double> %5, double %2, i32 1
+  %7 = insertelement <4 x double> %6, double %3, i32 2
+  %8 = insertelement <4 x double> %7, double %4, i32 3
+  ret <4 x double> %8
+}
+
+define <2 x double> @v2f64_dup_const_vector_case1(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case1(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    ret <2 x double> [[TMP7]]
+;
+{
+  %1 = fmul double %a, 21.0
+  %2 = fmul double %b, 22.0
+  %3 = fmul double %c, 21.0
+  %4 = fmul double %d, 22.0
+  %5 = insertelement <2 x double> poison, double %1, i32 0
+  %6 = insertelement <2 x double> %5, double %2, i32 1
+  %7 = insertelement <2 x double> poison, double %3, i32 0
+  %8 = insertelement <2 x double> %7, double %4, i32 1
+  %9 = fadd <2 x double> %6, %8
+  ret <2 x double> %9
+}
+
+define <2 x double> @v2f64_dup_const_vector_case2(double %a, double %b, double %c, double %d)
+; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case2(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], <double 2.100000e+01, double 2.200000e+01>
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+{
+  %1 = fmul double %a, 21.0
+  %2 = fmul double %b, 22.0
+  %3 = fadd double %1, 21.0
+  %4 = fadd double %2, 22.0
+  %5 = insertelement <2 x double> poison, double %3, i32 0
+  %6 = insertelement <2 x double> %5, double %4, i32 1
+  ret <2 x double> %6
+}