[llvm] [VectorCombine] Handle shuffle of selects (PR #128032)

via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 22 12:45:38 PST 2025


https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/128032

>From 2a4dfdf389ad5363aae2f270138954c4e7d342ca Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:11 +0900
Subject: [PATCH 1/4] add pre-test

---
 .../VectorCombine/X86/shuffle-of-selects.ll   | 319 ++++++++++++++++++
 1 file changed, 319 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll

diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
new file mode 100644
index 0000000000000..3bde1a59a4db0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -0,0 +1,319 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
+; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i16> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
+  %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
+  %res = shufflevector <2 x i16> %select.xz, <2 x i16> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %res
+}
+
+define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i16> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
+  %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
+  %res = shufflevector <4 x i16> %select.xz, <4 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i16> [[RES]]
+;
+  %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+  %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+  %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i16> [[RES]]
+;
+  %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+  %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+  %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %res
+}
+
+define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z
+  %select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x
+  %res = shufflevector <4 x i32> %select.xz, <4 x i32> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %res
+}
+
+define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i64> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+  %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+  %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %res
+}
+
+define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+  %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+  %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %res
+}
+
+define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <8 x float> @src_v4tov8_float(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z
+  %select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x
+  %res = shufflevector <4 x float> %select.xz, <4 x float> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %res
+}
+
+define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+  %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+  %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %res
+}
+
+define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <4 x double> @src_v2tov4_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x double> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+  %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+  %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %res
+}
+
+; FMF Flags
+define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+
+; Negative - Vector order
+define <4 x i32> @src_v2tov4_i32_backward(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_backward(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_in_same_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_in_same_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_to_other_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  ret <4 x i32> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX2: {{.*}}
+; AVX512: {{.*}}
+; SSE: {{.*}}

>From dc710495bb5b94a8eb23d519790068ee9355e54d Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:24 +0900
Subject: [PATCH 2/4] [VectorCombine] Handle shuffle of selects

(shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m)
-> (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))

The behaviour of SelectInst on vectors is the same as for
`V'select[i] = Condition[i] ? V'True[i] : V'False[i]`.

If a ShuffleVector is performed on two selects, it will be like:
`V'[mask] = (V'select[i] = Condition[i] ? V'True[i] : V'False[i])`

That's why a ShuffleVector with two SelectInst is equivalent to
first ShuffleVector Condition/True/False and then SelectInst that
result.

This patch implements the transforming described above.

Proof: https://alive2.llvm.org/ce/z/97wfHp
Fixed: https://github.com/llvm/llvm-project/issues/120775
---
 .../Transforms/Vectorize/VectorCombine.cpp    |  52 +++
 .../AArch64/shuffletoidentity.ll              |  25 +-
 .../VectorCombine/X86/shuffle-of-selects.ll   | 398 ++++++++++++++----
 3 files changed, 374 insertions(+), 101 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 746742e14d080..5e4b4b5a7e4b2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -119,6 +119,7 @@ class VectorCombine {
   bool foldConcatOfBoolMasks(Instruction &I);
   bool foldPermuteOfBinops(Instruction &I);
   bool foldShuffleOfBinops(Instruction &I);
+  bool foldShuffleOfSelects(Instruction &I);
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
@@ -1899,6 +1900,56 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   return true;
 }
 
+/// Try to convert,
+/// (shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m) into
+/// (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))
+bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
+  ArrayRef<int> Mask;
+  Value *C1, *T1, *F1, *C2, *T2, *F2;
+  if (!match(&I, m_Shuffle(
+                     m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))),
+                     m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))),
+                     m_Mask(Mask))))
+    return false;
+
+  auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
+  auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
+  auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
+  if (!C1VecTy || !C2VecTy)
+    return false;
+
+  auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
+  auto SelOp = Instruction::Select;
+  InstructionCost OldCost = TTI.getCmpSelInstrCost(
+      SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+  OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy,
+                                    CmpInst::BAD_ICMP_PREDICATE, CostKind);
+  OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
+                                {I.getOperand(0), I.getOperand(1)}, &I);
+
+  auto *C1C2VecTy = cast<FixedVectorType>(
+      toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
+  InstructionCost NewCost =
+      TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
+  NewCost +=
+      TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
+  NewCost +=
+      TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
+  NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy,
+                                    CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+  if (NewCost > OldCost)
+    return false;
+
+  Value *ShuffleCmp = Builder.CreateShuffleVector(C1, C2, Mask);
+  Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
+  Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
+  Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+
+  replaceValue(I, *NewSel);
+  return true;
+}
+
 /// Try to convert "shuffle (castop), (castop)" with a shared castop operand
 /// into "castop (shuffle)".
 bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
@@ -3352,6 +3403,7 @@ bool VectorCombine::run() {
       case Instruction::ShuffleVector:
         MadeChange |= foldPermuteOfBinops(I);
         MadeChange |= foldShuffleOfBinops(I);
+        MadeChange |= foldShuffleOfSelects(I);
         MadeChange |= foldShuffleOfCastops(I);
         MadeChange |= foldShuffleOfShuffles(I);
         MadeChange |= foldShuffleOfIntrinsics(I);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 09875c5e0af40..e1ede6a3aab5d 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -451,18 +451,18 @@ define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 
 define <8 x i8> @icmpsel_diffentcond(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: @icmpsel_diffentcond(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
 ; CHECK-NEXT:    [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
 ; CHECK-NEXT:    [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    [[ABT1:%.*]] = icmp slt <4 x i8> [[AT]], [[BT]]
-; CHECK-NEXT:    [[ABB1:%.*]] = icmp ult <4 x i8> [[AB]], [[BB]]
-; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]]
-; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]]
+; CHECK-NEXT:    [[CB1:%.*]] = shufflevector <8 x i8> [[C1:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[CT1:%.*]] = shufflevector <8 x i8> [[C1]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[DB1:%.*]] = shufflevector <8 x i8> [[D1:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[DT1:%.*]] = shufflevector <8 x i8> [[D1]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[ABT1:%.*]] = icmp slt <4 x i8> [[CT]], [[DT]]
+; CHECK-NEXT:    [[ABB1:%.*]] = icmp ult <4 x i8> [[CB]], [[DB]]
+; CHECK-NEXT:    [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT1]], <4 x i8> [[DT1]]
+; CHECK-NEXT:    [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB1]], <4 x i8> [[DB1]]
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
@@ -992,14 +992,15 @@ define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %va
 }
 
 ; Peek through (repeated) bitcasts to find a common source value.
+; TODO : We can remove the Shufflevector for A, B.
 define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK-LABEL: @bitcast_smax_v8i32_v4i32(
 ; CHECK-NEXT:    [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
-; CHECK-NEXT:    [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
-; CHECK-NEXT:    [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
-; CHECK-NEXT:    [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
+; CHECK-NEXT:    [[CONCAT:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP3]], <8 x i32> [[TMP5]]
 ; CHECK-NEXT:    [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
 ; CHECK-NEXT:    ret <4 x i64> [[RES]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 3bde1a59a4db0..36d2ccd54b13a 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -4,12 +4,28 @@
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
 
 define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
-; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i16> [[RES]]
+; SSE-LABEL: define <4 x i16> @src_v2tov4_i16(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; SSE-NEXT:    ret <4 x i16> [[RES]]
+;
+; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <4 x i16> [[RES]]
+;
+; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x i16> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
   %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
@@ -18,12 +34,28 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1
 }
 
 define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v4tov8_i16(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT:    ret <8 x i16> [[RES]]
 ;
   %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
   %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
@@ -32,12 +64,28 @@ define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i1
 }
 
 define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v8tov8_i16(
+; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT:    ret <8 x i16> [[RES]]
 ;
   %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
   %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
@@ -47,7 +95,7 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
 
 define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
 ; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -60,12 +108,28 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x
 }
 
 define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
-; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i32> [[RES]]
+; SSE-LABEL: define <4 x i32> @src_v2tov4_i32(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; SSE-NEXT:    ret <4 x i32> [[RES]]
+;
+; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; AVX2-NEXT:    ret <4 x i32> [[RES]]
+;
+; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x i32> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
   %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
@@ -88,12 +152,28 @@ define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i3
 }
 
 define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
-; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    ret <2 x i64> [[RES]]
+; SSE-LABEL: define <2 x i64> @src_v2tov2_i64(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; SSE-NEXT:    ret <2 x i64> [[RES]]
+;
+; AVX2-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; AVX2-NEXT:    ret <2 x i64> [[RES]]
+;
+; AVX512-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT:    ret <2 x i64> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
   %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
@@ -116,12 +196,28 @@ define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
 }
 
 define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -144,12 +240,28 @@ define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <
 }
 
 define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    ret <2 x double> [[RES]]
+; SSE-LABEL: define <2 x double> @src_v2tov2_double(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; SSE-NEXT:    ret <2 x double> [[RES]]
+;
+; AVX2-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; AVX2-NEXT:    ret <2 x double> [[RES]]
+;
+; AVX512-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT:    ret <2 x double> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
   %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
@@ -173,12 +285,28 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
 
 ; FMF Flags
 define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -187,12 +315,28 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 }
 
 define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -201,12 +345,28 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 }
 
 define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -215,12 +375,28 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 }
 
 define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -229,12 +405,28 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 }
 
 define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -243,12 +435,28 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 }
 
 define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -257,12 +465,28 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 }
 
 define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -313,7 +537,3 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b
   %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
   ret <4 x i32> %res
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX2: {{.*}}
-; AVX512: {{.*}}
-; SSE: {{.*}}

>From 9da4c71b2c5ac8dc4de880257bcde6812a7d6c93 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:26:22 +0900
Subject: [PATCH 3/4] Preserve Fast-Math Flags

---
 .../Transforms/Vectorize/VectorCombine.cpp    |  8 ++++++
 .../VectorCombine/X86/shuffle-of-selects.ll   | 28 +++++++++----------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5e4b4b5a7e4b2..0df6f4101e1f4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1918,6 +1918,12 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   if (!C1VecTy || !C2VecTy)
     return false;
 
+  auto *Select0 = cast<Instruction>(I.getOperand(0));
+  if (auto *SI0FOp = dyn_cast<FPMathOperator>(Select0))
+    if (auto *SI1FOp = dyn_cast<FPMathOperator>((I.getOperand(1))))
+      if (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())
+        return false;
+
   auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
   auto SelOp = Instruction::Select;
   InstructionCost OldCost = TTI.getCmpSelInstrCost(
@@ -1945,6 +1951,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
   Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
   Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+  if (isa<FPMathOperator>(NewSel))
+    cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
 
   replaceValue(I, *NewSel);
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 36d2ccd54b13a..91f7fdd601cdf 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -290,7 +290,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -298,7 +298,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -320,7 +320,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -328,7 +328,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -350,7 +350,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -358,7 +358,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -380,7 +380,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -388,7 +388,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -410,7 +410,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -418,7 +418,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -440,7 +440,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -448,7 +448,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -470,7 +470,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
@@ -478,7 +478,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(

>From e6b56c9031996b0724a56e9f8088aad60d221d6e Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:28:44 +0900
Subject: [PATCH 4/4] NewSel the transformed result append to Worklist

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0df6f4101e1f4..a0f77e3c2ba93 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1954,6 +1954,7 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   if (isa<FPMathOperator>(NewSel))
     cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
 
+  Worklist.pushValue(NewSel);
   replaceValue(I, *NewSel);
   return true;
 }



More information about the llvm-commits mailing list