[llvm] 9acaaeb - [VectorCombine][X86] Add insert(binop(x,y),binop(a,b),idx) test coverage for #124909
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 07:22:52 PST 2025
Author: Simon Pilgrim
Date: 2025-01-30T15:12:17Z
New Revision: 9acaaebcdd39f7584a20388f1bad8a9f721bf9d0
URL: https://github.com/llvm/llvm-project/commit/9acaaebcdd39f7584a20388f1bad8a9f721bf9d0
DIFF: https://github.com/llvm/llvm-project/commit/9acaaebcdd39f7584a20388f1bad8a9f721bf9d0.diff
LOG: [VectorCombine][X86] Add insert(binop(x,y),binop(a,b),idx) test coverage for #124909
Added:
llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll
new file mode 100644
index 00000000000000..7d4d3ec7d0585b
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-vector.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
+
+define <2 x double> @insert1_v2f64_f64_fdiv(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert1_v2f64_f64_fdiv(
+; CHECK-NEXT: [[S:%.*]] = fdiv double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = fdiv <2 x double> [[TMP1:%.*]], [[TMP2:%.*]]
+; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R]], double [[S]], i32 1
+; CHECK-NEXT: ret <2 x double> [[R1]]
+;
+ %s = fdiv double %s0, %s1
+ %v = fdiv <2 x double> %v0, %v1
+ %r = insertelement <2 x double> %v, double %s, i32 1
+ ret <2 x double> %r
+}
+
+define <4 x i32> @insert0_v4i32_i32_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
+; CHECK-LABEL: @insert0_v4i32_i32_add(
+; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %s = add i32 %s0, %s1
+ %v = add <4 x i32> %v0, %v1
+ %r = insertelement <4 x i32> %v, i32 %s, i32 0
+ ret <4 x i32> %r
+}
+
+define <16 x i16> @insert9_v16i16_i16_add(<16 x i16> %v0, <16 x i16> %v1, i16 %s0, i16 %s1) {
+; CHECK-LABEL: @insert9_v16i16_i16_add(
+; CHECK-NEXT: [[S:%.*]] = add i16 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = add <16 x i16> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i16> [[V]], i16 [[S]], i32 9
+; CHECK-NEXT: ret <16 x i16> [[R]]
+;
+ %s = add i16 %s0, %s1
+ %v = add <16 x i16> %v0, %v1
+ %r = insertelement <16 x i16> %v, i16 %s, i32 9
+ ret <16 x i16> %r
+}
+
+; Merge flags
+define <4 x float> @insert0_v4f32_f32_fadd_common_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
+; CHECK-LABEL: @insert0_v4f32_f32_fadd_common_flags(
+; CHECK-NEXT: [[S:%.*]] = fadd fast float [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[TMP1:%.*]], [[TMP2:%.*]]
+; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R]], float [[S]], i32 0
+; CHECK-NEXT: ret <4 x float> [[R1]]
+;
+ %s = fadd fast float %s0, %s1
+ %v = fadd fast <4 x float> %v0, %v1
+ %r = insertelement <4 x float> %v, float %s, i32 0
+ ret <4 x float> %r
+}
+
+; Merge (shared) flags
+define <4 x float> @insert1_v4f32_f32_fsub_mixed_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
+; CHECK-LABEL: @insert1_v4f32_f32_fsub_mixed_flags(
+; CHECK-NEXT: [[S:%.*]] = fsub nnan nsz float [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = fsub nnan ninf <4 x float> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[V]], float [[S]], i32 1
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %s = fsub nnan nsz float %s0, %s1
+ %v = fsub nnan ninf <4 x float> %v0, %v1
+ %r = insertelement <4 x float> %v, float %s, i32 1
+ ret <4 x float> %r
+}
+
+; TODO: Fold equivalent opcodes
+define <4 x i32> @insert0_v4i32_i32_or_disjoint_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
+; CHECK-LABEL: @insert0_v4i32_i32_or_disjoint_add(
+; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = or disjoint <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %s = add i32 %s0, %s1
+ %v = or disjoint <4 x i32> %v0, %v1
+ %r = insertelement <4 x i32> %v, i32 %s, i32 0
+ ret <4 x i32> %r
+}
+
+; Negative - multi use
+define <2 x double> @insert0_v2f64_f64_fmul_multiuse(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert0_v2f64_f64_fmul_multiuse(
+; CHECK-NEXT: [[S:%.*]] = fmul double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = fmul <2 x double> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
+; CHECK-NEXT: call void @use_f64(double [[S]])
+; CHECK-NEXT: ret <2 x double> [[R]]
+;
+ %s = fmul double %s0, %s1
+ %v = fmul <2 x double> %v0, %v1
+ %r = insertelement <2 x double> %v, double %s, i32 0
+ call void @use_f64(double %s)
+ ret <2 x double> %r
+}
+declare void @use_f64(<2 x double>)
+
+; Negative - multi use
+define <2 x i64> @insert0_v2i64_i64_add_multiuse(<2 x i64> %v0, <2 x i64> %v1, i64 %s0, i64 %s1) {
+; CHECK-LABEL: @insert0_v2i64_i64_add_multiuse(
+; CHECK-NEXT: [[S:%.*]] = add i64 [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> [[V]], i64 [[S]], i32 0
+; CHECK-NEXT: call void @use_v2i64(<2 x i64> [[V]])
+; CHECK-NEXT: ret <2 x i64> [[R]]
+;
+ %s = add i64 %s0, %s1
+ %v = add <2 x i64> %v0, %v1
+ %r = insertelement <2 x i64> %v, i64 %s, i32 0
+ call void @use_v2i64(<2 x i64> %v)
+ ret <2 x i64> %r
+}
+declare void @use_v2i64(<2 x i64>)
+
+; Negative - binop mismatch
+define <2 x double> @insert0_v2f64_f64_fadd_fsub(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
+; CHECK-LABEL: @insert0_v2f64_f64_fadd_fsub(
+; CHECK-NEXT: [[S:%.*]] = fsub double [[S0:%.*]], [[S1:%.*]]
+; CHECK-NEXT: [[V:%.*]] = fadd <2 x double> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
+; CHECK-NEXT: ret <2 x double> [[R]]
+;
+ %s = fsub double %s0, %s1
+ %v = fadd <2 x double> %v0, %v1
+ %r = insertelement <2 x double> %v, double %s, i32 0
+ ret <2 x double> %r
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
+; SSE2: {{.*}}
+; SSE4: {{.*}}
More information about the llvm-commits
mailing list