[llvm] goldsteinn/shuf of insert (PR #84645)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 9 09:50:16 PST 2024
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/84645
- **[InstCombine] Add tests for scalarizing of blended vector inserts; NFC**
- **[InstCombine] Scalarize `(vec_ops (insert ?**
>From a489bf4ccfb200132a0f6849ad5001130d115b2b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 9 Mar 2024 00:12:33 -0600
Subject: [PATCH 1/2] [InstCombine] Add tests for scalarizing of blended vector
inserts; NFC
---
.../InstCombine/shufflevector-inselt.ll | 266 ++++++++++++++++++
1 file changed, 266 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
new file mode 100644
index 00000000000000..bdb042e6a483af
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='instcombine' -S %s | FileCheck %s
+
+declare void @use.v2.float(<2 x float>)
+define <2 x float> @replace_through_casts(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %r = shufflevector <2 x float> %ui_v, <2 x float> %si_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_and_binop(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[MUL]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %mul = mul nsw i16 %inp, 5
+ %v0 = insertelement <2 x i16> poison, i16 %mul, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_and_binop_and_unop(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[INP]], i64 1
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %inp, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 2>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_through_splat(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
+; CHECK-NEXT: [[V:%.*]] = shufflevector <2 x i16> [[V0]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = shufflevector <2 x i16> %v0, <2 x i16> poison, <2 x i32> zeroinitializer
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat2(i16 %inp, <2 x i16> %any) {
+; CHECK-LABEL: @replace_through_casts_through_splat2(
+; CHECK-NEXT: ret <2 x float> poison
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = shufflevector <2 x i16> %v0, <2 x i16> %any, <2 x i32> <i32 1, i32 1>
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat_fail(i16 %inp, <2 x i16> %any) {
+; CHECK-LABEL: @replace_through_casts_through_splat_fail(
+; CHECK-NEXT: [[V:%.*]] = shufflevector <2 x i16> [[V0:%.*]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 1
+ %v = shufflevector <2 x i16> %v0, <2 x i16> %any, <2 x i32> <i32 2, i32 2>
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: @replace_through_int_casts(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: [[SI32_V:%.*]] = sext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[ZI32_V]], <2 x i32> [[SI32_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> %dead, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %zi32_v = zext <2 x i16> %v to <2 x i32>
+ %si32_v = sext <2 x i16> %v to <2 x i32>
+ %r = shufflevector <2 x i32> %zi32_v, <2 x i32> %si32_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x i32> %r
+}
+
+define <2 x float> @replace_through_int_bitcasts_todo(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: @replace_through_int_bitcasts_todo(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[BI_V:%.*]] = bitcast <2 x i32> [[ZI32_V]] to <2 x float>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[BI_V]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> %dead, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %zi32_v = zext <2 x i16> %v to <2 x i32>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %bi_v = bitcast <2 x i32> %zi32_v to <2 x float>
+ %r = shufflevector <2 x float> %bi_v, <2 x float> %si_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_todo_fail_multiuse(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_todo_fail_multiuse(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[INP]], i64 1
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
+; CHECK-NEXT: call void @use.v2.float(<2 x float> [[UI_V]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %inp, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ call void @use.v2.float(<2 x float> %ui_v)
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: @replace_through_int_casts_ele0_only(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: [[SI32_V:%.*]] = sext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[ZI32_V]], <2 x i32> [[SI32_V]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %zi32_v = zext <2 x i16> %v to <2 x i32>
+ %si32_v = sext <2 x i16> %v to <2 x i32>
+ %r = shufflevector <2 x i32> %zi32_v, <2 x i32> %si32_v, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %r
+}
+
+define <2 x i32> @replace_through_int_casts_fail_op0_only(i16 %inp, <2 x i16> %dead) {
+; CHECK-LABEL: @replace_through_int_casts_fail_op0_only(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
+; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[ZI32_V]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %zi32_v = zext <2 x i16> %v to <2 x i32>
+ %si32_v = sext <2 x i16> %v to <2 x i32>
+ %r = shufflevector <2 x i32> %zi32_v, <2 x i32> %si32_v, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %r
+}
+
+define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
+; CHECK-LABEL: @replace_through_binop_fail_cant_speculate(
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[INP:%.*]], 5
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
+; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> <i8 -128, i8 -128>, [[V]]
+; CHECK-NEXT: [[DIV1:%.*]] = xor <2 x i8> [[V]], <i8 123, i8 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[DIV1]], <2 x i8> [[DIV0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x i8> [[R]]
+;
+ %add = add i8 %inp, 5
+ %v0 = insertelement <2 x i8> %any, i8 %inp, i64 0
+ %v = insertelement <2 x i8> %v0, i8 %add, i64 1
+ %div0 = sdiv <2 x i8> <i8 128, i8 128>, %v
+ %div1 = xor <2 x i8> %v, <i8 123, i8 12>
+ %r = shufflevector <2 x i8> %div1, <2 x i8> %div0, <2 x i32> <i32 0, i32 3>
+ ret <2 x i8> %r
+}
+
+define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
+; CHECK-LABEL: @replace_through_binop_preserve_flags(
+; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP:%.*]], 5
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
+; CHECK-NEXT: [[DIV0:%.*]] = add nsw <2 x i8> [[V]], <i8 poison, i8 1>
+; CHECK-NEXT: [[DIV1:%.*]] = xor <2 x i8> [[V]], <i8 123, i8 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[DIV1]], <2 x i8> [[DIV0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x i8> [[R]]
+;
+ %add = xor i8 %inp, 5
+ %v0 = insertelement <2 x i8> %any, i8 %inp, i64 0
+ %v = insertelement <2 x i8> %v0, i8 %add, i64 1
+ %div0 = add nsw <2 x i8> <i8 12, i8 1>, %v
+ %div1 = xor <2 x i8> %v, <i8 123, i8 12>
+ %r = shufflevector <2 x i8> %div1, <2 x i8> %div0, <2 x i32> <i32 0, i32 3>
+ ret <2 x i8> %r
+}
>From b56bb3a4729c65867ef76f8b460cacb69e839880 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 9 Mar 2024 00:12:36 -0600
Subject: [PATCH 2/2] [InstCombine] Scalarize `(vec_ops (insert ?, X, Idx))`
when only one element is demanded
This came as a result of PR #84389. SLP vectorizer can vectorize in a
pattern like:
```
(blend
(vec_ops0... (insert ?,X,0)),
(vec_ops1... (insert ?,Y,1))
)
```
In this case, `vec_ops0...` and `vec_ops1...` are essentially doing
scalar transforms.
We previously we handle things like:
`(blend (insert ?,X,0), (insert ?,Y,0))`
This patch extends that to look through `vec_ops...` that can be
scalarized, and if its possible to scalarize all ops, it transforms
the input to:
```
(blend
(insert ?,(scalar_ops0... X), 0),
(insert ?,(scalar_ops1... Y), 0)
)
```
---
.../InstCombine/InstCombineVectorOps.cpp | 152 ++++++++++++++----
.../insert-extract-shuffle-inseltpoison.ll | 3 +-
.../InstCombine/insert-extract-shuffle.ll | 3 +-
.../shufflevector-div-rem-inseltpoison.ll | 12 +-
.../InstCombine/shufflevector-div-rem.ll | 12 +-
.../InstCombine/shufflevector-inselt.ll | 88 +++++-----
.../invariant-store-vectorization-2.ll | 8 +-
.../X86/alternate-calls-inseltpoison.ll | 27 ++--
.../SLPVectorizer/X86/alternate-calls.ll | 27 ++--
.../X86/alternate-int-inseltpoison.ll | 14 +-
.../SLPVectorizer/X86/hadd-inseltpoison.ll | 18 +--
11 files changed, 219 insertions(+), 145 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 3c4c0f35eb6d48..bf46359accaaa0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2579,6 +2579,97 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
return new ShuffleVectorInst(X, Y, NewMask);
}
+// Extract `(scalar_ops... x)` from `(vector_ops... (insert ?, x, C)`
+static Value *
+getScalarizationOfInsertElement(Value *V, int ReqIndexC,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X, *Base;
+ ConstantInt *IndexC;
+ // Found a select.
+ if (match(V, m_InsertElt(m_Value(Base), m_Value(X), m_ConstantInt(IndexC)))) {
+ // See if matches the index we need.
+ if (match(IndexC, m_SpecificInt(ReqIndexC)))
+ return X;
+ // Otherwise continue searching. This is necessary for finding both elements
+ // in the common pattern:
+ // V0 = (insert poison x, 0)
+ // V1 = (insert V0, y, 1)
+ return getScalarizationOfInsertElement(Base, ReqIndexC, Builder);
+ }
+
+ // We can search through a splat of a single element for an insert.
+ int SplatIndex;
+ if (match(V, m_Shuffle(m_Value(Base), m_Value(X),
+ m_SplatOrUndefMask(SplatIndex))) &&
+ SplatIndex >= 0) {
+ if (auto *VType = dyn_cast<FixedVectorType>(V->getType())) {
+ // Chase whichever vector (Base/X) we are splatting from.
+ if (static_cast<unsigned>(SplatIndex) >= VType->getNumElements())
+ return getScalarizationOfInsertElement(
+ X, SplatIndex - VType->getNumElements(), Builder);
+ // New index we need to find is the index we are splatting from.
+ return getScalarizationOfInsertElement(Base, SplatIndex, Builder);
+ }
+ return nullptr;
+ }
+
+ // We don't want to duplicate `vector_ops...` if they have multiple uses.
+ if (!V->hasOneUse())
+ return nullptr;
+
+ Value *R = nullptr;
+ // Scalarize any unary op.
+ if (match(V, m_UnOp(m_Value(X)))) {
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder))
+ R = Builder.CreateUnOp(cast<UnaryOperator>(V)->getOpcode(), Scalar);
+ }
+
+ // Scalarize any cast but bitcast.
+ // TODO: We skip bitcasts, but they would be okay if they are elementwise.
+ if (isa<CastInst>(V) && !match(V, m_BitCast(m_Value()))) {
+ X = cast<CastInst>(V)->getOperand(0);
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder))
+ R = Builder.CreateCast(cast<CastInst>(V)->getOpcode(), Scalar,
+ V->getType()->getScalarType());
+ }
+
+ // Binop with a constant.
+ Constant *C;
+ if (match(V, m_c_BinOp(m_Value(X), m_ImmConstant(C)))) {
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (isSafeToSpeculativelyExecute(BO)) {
+ if (auto *Scalar =
+ getScalarizationOfInsertElement(X, ReqIndexC, Builder)) {
+ auto *ScalarC =
+ ConstantExpr::getExtractElement(C, Builder.getInt64(ReqIndexC));
+
+ BinaryOperator::BinaryOps Opc = BO->getOpcode();
+ if (match(V, m_c_BinOp(m_Value(X), m_ImmConstant(C))))
+ R = Builder.CreateBinOp(Opc, Scalar, ScalarC);
+ else
+ R = Builder.CreateBinOp(Opc, ScalarC, Scalar);
+ }
+ }
+ }
+
+ // Cmp with a constant.
+ CmpInst::Predicate Pred;
+ if (match(V, m_Cmp(Pred, m_Value(X), m_ImmConstant(C)))) {
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder)) {
+ auto *ScalarC =
+ ConstantExpr::getExtractElement(C, Builder.getInt64(ReqIndexC));
+ R = Builder.CreateCmp(Pred, Scalar, ScalarC);
+ }
+ }
+ // TODO: Intrinsics
+
+ // If we created a new scalar instruction, copy flags from the vec version.
+ if (R != nullptr)
+ cast<Instruction>(R)->copyIRFlags(V);
+
+ return R;
+}
+
/// Try to replace a shuffle with an insertelement or try to replace a shuffle
/// operand with the operand of an insertelement.
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
@@ -2616,13 +2707,11 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (NumElts != InpNumElts)
return nullptr;
- // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
- auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
- // We need an insertelement with a constant index.
- if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
- m_ConstantInt(IndexC))))
- return false;
+ // (shuffle (vec_ops... (insert ?, Scalar, IndexC)), V1, Mask)
+ // --> insert V1, (scalar_ops... Scalar), IndexC'
+ auto GetScalarizationOfInsertEle =
+ [&Mask, &NumElts, &IC](Value *V) -> std::pair<Value *, int> {
// Test the shuffle mask to see if it splices the inserted scalar into the
// operand 1 vector of the shuffle.
int NewInsIndex = -1;
@@ -2631,40 +2720,45 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (Mask[i] == -1)
continue;
- // The shuffle takes elements of operand 1 without lane changes.
- if (Mask[i] == NumElts + i)
+ // The shuffle takes elements of operand 1.
+ if (Mask[i] >= NumElts)
continue;
// The shuffle must choose the inserted scalar exactly once.
- if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
- return false;
+ if (NewInsIndex != -1)
+ return {nullptr, -1};
- // The shuffle is placing the inserted scalar into element i.
+ // The shuffle is placing the inserted scalar into element i from operand
+ // 0.
NewInsIndex = i;
}
- assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
+ // Operand is unused.
+ if (NewInsIndex < 0)
+ return {nullptr, -1};
- // Index is updated to the potentially translated insertion lane.
- IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
- return true;
- };
+ Value *Scalar =
+ getScalarizationOfInsertElement(V, Mask[NewInsIndex], IC.Builder);
- // If the shuffle is unnecessary, insert the scalar operand directly into
- // operand 1 of the shuffle. Example:
- // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
- Value *Scalar;
- ConstantInt *IndexC;
- if (isShufflingScalarIntoOp1(Scalar, IndexC))
- return InsertElementInst::Create(V1, Scalar, IndexC);
+ return {Scalar, NewInsIndex};
+ };
- // Try again after commuting shuffle. Example:
- // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
- // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
- std::swap(V0, V1);
+ auto [V0Scalar, V0NewInsertIdx] = GetScalarizationOfInsertEle(V0);
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
- if (isShufflingScalarIntoOp1(Scalar, IndexC))
- return InsertElementInst::Create(V1, Scalar, IndexC);
+ auto [V1Scalar, V1NewInsertIdx] = GetScalarizationOfInsertEle(V1);
+
+ if (V0Scalar != nullptr && V1Scalar != nullptr) {
+ Value *R = IC.Builder.CreateInsertElement(Shuf.getType(), V0Scalar,
+ V0NewInsertIdx);
+ return InsertElementInst::Create(R, V1Scalar,
+ IC.Builder.getInt64(V1NewInsertIdx));
+ } else if (V0Scalar != nullptr) {
+ return InsertElementInst::Create(V1, V0Scalar,
+ IC.Builder.getInt64(V0NewInsertIdx));
+ } else if (V1Scalar != nullptr) {
+ return InsertElementInst::Create(V0, V1Scalar,
+ IC.Builder.getInt64(V1NewInsertIdx));
+ }
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
index 6cbb2a246f5a4a..4bc3bd7bd9c203 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
@@ -547,8 +547,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
-; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
-; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 poison, i32 0, i32 4, i32 poison>
+; CHECK-NEXT: [[SPLAT:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
index c87e2e8596c62d..97fcc7a4b43707 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -547,8 +547,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
-; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
-; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 poison, i32 0, i32 4, i32 poison>
+; CHECK-NEXT: [[SPLAT:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
index cc1d23943b09c8..2392bcac6eb7ec 100644
--- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
@@ -88,9 +88,9 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = fdiv float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
@@ -105,9 +105,9 @@ define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = frem float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
index 1699418dcc28b3..f445a0134b3589 100644
--- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
@@ -88,9 +88,9 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = fdiv float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> undef, float %a, i32 0
@@ -105,9 +105,9 @@ define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = frem float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> undef, float %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
index bdb042e6a483af..dae4ca1d8eba79 100644
--- a/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
@@ -5,11 +5,10 @@ declare void @use.v2.float(<2 x float>)
define <2 x float> @replace_through_casts(i16 %inp) {
; CHECK-LABEL: @replace_through_casts(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
-; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -25,12 +24,11 @@ define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
; CHECK-LABEL: @replace_through_casts_and_binop(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[MUL]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
-; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[MUL]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -47,13 +45,12 @@ define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
; CHECK-LABEL: @replace_through_casts_and_binop_and_unop(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[INP]], i64 1
-; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
-; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -70,13 +67,12 @@ define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
; CHECK-LABEL: @replace_through_casts_through_splat(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
-; CHECK-NEXT: [[V:%.*]] = shufflevector <2 x i16> [[V0]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
-; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -129,11 +125,10 @@ define <2 x float> @replace_through_casts_through_splat_fail(i16 %inp, <2 x i16>
define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: @replace_through_int_casts(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
-; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
-; CHECK-NEXT: [[SI32_V:%.*]] = sext <2 x i16> [[V]] to <2 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[ZI32_V]], <2 x i32> [[SI32_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -151,9 +146,9 @@ define <2 x float> @replace_through_int_bitcasts_todo(i16 %inp, <2 x i16> %dead)
; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
; CHECK-NEXT: [[BI_V:%.*]] = bitcast <2 x i32> [[ZI32_V]] to <2 x float>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[BI_V]], <2 x float> [[SI_V]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[BI_V]], float [[TMP1]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -172,11 +167,11 @@ define <2 x float> @replace_through_casts_todo_fail_multiuse(i16 %inp) {
; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[INP]], i64 1
; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
-; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
-; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
; CHECK-NEXT: call void @use.v2.float(<2 x float> [[UI_V]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[INP]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[UI_V_ADD]], float [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -193,12 +188,10 @@ define <2 x float> @replace_through_casts_todo_fail_multiuse(i16 %inp) {
define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: @replace_through_int_casts_ele0_only(
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i16> [[V0]], i16 [[ADD]], i64 1
-; CHECK-NEXT: [[ZI32_V:%.*]] = zext <2 x i16> [[V]] to <2 x i32>
-; CHECK-NEXT: [[SI32_V:%.*]] = sext <2 x i16> [[V]] to <2 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[ZI32_V]], <2 x i32> [[SI32_V]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[INP]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -233,8 +226,8 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d,
; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> <i8 -128, i8 -128>, [[V]]
-; CHECK-NEXT: [[DIV1:%.*]] = xor <2 x i8> [[V]], <i8 123, i8 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[DIV1]], <2 x i8> [[DIV0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%add = add i8 %inp, 5
@@ -249,11 +242,10 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d,
define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
; CHECK-LABEL: @replace_through_binop_preserve_flags(
; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP:%.*]], 5
-; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
-; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
-; CHECK-NEXT: [[DIV0:%.*]] = add nsw <2 x i8> [[V]], <i8 poison, i8 1>
-; CHECK-NEXT: [[DIV1:%.*]] = xor <2 x i8> [[V]], <i8 123, i8 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[DIV1]], <2 x i8> [[DIV0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%add = xor i8 %inp, 5
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
index 50c67040cfb2a9..21b5c08b65c113 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
@@ -120,7 +120,7 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional_inv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[NTRUNC]], [[K:%.*]]
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
@@ -136,9 +136,9 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 3
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 3
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT6]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 3
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -165,7 +165,7 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
; CHECK: cond_store_k:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
-; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE]] ], [ [[K]], [[COND_STORE_K]] ]
+; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE_K]] ], [ [[K]], [[COND_STORE]] ]
; CHECK-NEXT: store i32 [[STOREVAL]], ptr [[A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
index 6c21cc1cfc5be8..2793df6be9e712 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
@@ -18,14 +18,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE-NEXT: ret <8 x float> [[R71]]
;
; SLM-LABEL: @ceil_floor(
@@ -39,14 +38,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SLM-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; SLM-NEXT: ret <8 x float> [[R71]]
;
; AVX-LABEL: @ceil_floor(
@@ -83,14 +81,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX2-NEXT: ret <8 x float> [[R71]]
;
%a0 = extractelement <8 x float> %a, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
index bc5bcee361168a..f4d66a13d07c05 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
@@ -18,14 +18,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; SSE-NEXT: ret <8 x float> [[R71]]
;
; SLM-LABEL: @ceil_floor(
@@ -39,14 +38,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SLM-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; SLM-NEXT: ret <8 x float> [[R71]]
;
; AVX-LABEL: @ceil_floor(
@@ -83,14 +81,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
-; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R23:%.*]] = insertelement <8 x float> [[TMP7]], float [[AB0]], i64 0
; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX2-NEXT: ret <8 x float> [[R71]]
;
%a0 = extractelement <8 x float> %a, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
index 8e878f3f8b80fa..292ede5dbae219 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
@@ -448,12 +448,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], <i32 8, i32 16>
-; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1
-; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R32:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[AB1]], i64 1
; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5
; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 8, i32 9>
+; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 5, i32 8, i32 9>
; AVX2-NEXT: ret <8 x i32> [[R71]]
;
; AVX512-LABEL: @sdiv_v8i32_undefs(
@@ -465,12 +464,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], <i32 8, i32 16>
-; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1
-; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX512-NEXT: [[R32:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[AB1]], i64 1
; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5
; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 8, i32 9>
+; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 5, i32 8, i32 9>
; AVX512-NEXT: ret <8 x i32> [[R71]]
;
%a0 = extractelement <8 x i32> %a, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index 4a9f717918a029..b59edab0c1f306 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -239,24 +239,22 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b
; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0
; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1
; AVX1-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
-; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2>
-; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
+; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 2>
+; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; AVX1-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0
-; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; AVX1-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
+; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX1-NEXT: [[R031:%.*]] = insertelement <4 x double> [[TMP4]], double [[R0]], i64 0
; AVX1-NEXT: ret <4 x double> [[R031]]
;
; AVX2-LABEL: @test_v4f64_partial_swizzle(
; AVX2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0
; AVX2-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1
; AVX2-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 2>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; AVX2-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0
-; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; AVX2-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
+; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX2-NEXT: [[R031:%.*]] = insertelement <4 x double> [[TMP4]], double [[R0]], i64 0
; AVX2-NEXT: ret <4 x double> [[R031]]
;
; AVX512-LABEL: @test_v4f64_partial_swizzle(
More information about the llvm-commits
mailing list