[llvm] [VectorCombine] Handle shuffle of selects (PR #128032)

via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 22 13:18:08 PST 2025


https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/128032

>From 2a4dfdf389ad5363aae2f270138954c4e7d342ca Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:11 +0900
Subject: [PATCH 1/4] add pre-test

---
 .../VectorCombine/X86/shuffle-of-selects.ll   | 319 ++++++++++++++++++
 1 file changed, 319 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll

diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
new file mode 100644
index 0000000000000..3bde1a59a4db0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -0,0 +1,319 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
+; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i16> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
+  %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
+  %res = shufflevector <2 x i16> %select.xz, <2 x i16> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %res
+}
+
+define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i16> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
+  %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
+  %res = shufflevector <4 x i16> %select.xz, <4 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i16> [[RES]]
+;
+  %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+  %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+  %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i16> [[RES]]
+;
+  %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+  %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+  %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %res
+}
+
+define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z
+  %select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x
+  %res = shufflevector <4 x i32> %select.xz, <4 x i32> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %res
+}
+
+define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i64> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+  %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+  %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %res
+}
+
+define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+  %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+  %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %res
+}
+
+define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <8 x float> @src_v4tov8_float(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[RES]]
+;
+  %select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z
+  %select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x
+  %res = shufflevector <4 x float> %select.xz, <4 x float> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %res
+}
+
+define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+  %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+  %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %res
+}
+
+define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <4 x double> @src_v2tov4_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x double> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+  %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+  %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %res
+}
+
+; FMF Flags
+define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+  %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+  %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %res
+}
+
+
+; Negative - Vector order
+define <4 x i32> @src_v2tov4_i32_backward(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_backward(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_in_same_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_in_same_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_to_other_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+  %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+  %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  ret <4 x i32> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX2: {{.*}}
+; AVX512: {{.*}}
+; SSE: {{.*}}

>From 4c989b9e45ba01e5545160bb217a9ca874ebc46f Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:24 +0900
Subject: [PATCH 2/4] [VectorCombine] Handle shuffle of selects

(shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m)
-> (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))

The behaviour of SelectInst on vectors is the same as for
`V'select[i] = Condition[i] ? V'True[i] : V'False[i]`.

If a ShuffleVector is performed on two selects, it will be like:
`V'[mask] = (V'select[i] = Condition[i] ? V'True[i] : V'False[i])`

That's why a ShuffleVector with two SelectInst is equivalent to
first ShuffleVector Condition/True/False and then SelectInst that
result.

This patch implements the transforming described above.

Proof: https://alive2.llvm.org/ce/z/97wfHp
Fixed: https://github.com/llvm/llvm-project/issues/120775
---
 .../Transforms/Vectorize/VectorCombine.cpp    |  52 +++
 .../VectorCombine/X86/shuffle-of-selects.ll   | 398 ++++++++++++++----
 2 files changed, 361 insertions(+), 89 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 746742e14d080..5e4b4b5a7e4b2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -119,6 +119,7 @@ class VectorCombine {
   bool foldConcatOfBoolMasks(Instruction &I);
   bool foldPermuteOfBinops(Instruction &I);
   bool foldShuffleOfBinops(Instruction &I);
+  bool foldShuffleOfSelects(Instruction &I);
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
@@ -1899,6 +1900,56 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
   return true;
 }
 
+/// Try to convert,
+/// (shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m) into
+/// (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))
+bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
+  ArrayRef<int> Mask;
+  Value *C1, *T1, *F1, *C2, *T2, *F2;
+  if (!match(&I, m_Shuffle(
+                     m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))),
+                     m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))),
+                     m_Mask(Mask))))
+    return false;
+
+  auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
+  auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
+  auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
+  if (!C1VecTy || !C2VecTy)
+    return false;
+
+  auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
+  auto SelOp = Instruction::Select;
+  InstructionCost OldCost = TTI.getCmpSelInstrCost(
+      SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+  OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy,
+                                    CmpInst::BAD_ICMP_PREDICATE, CostKind);
+  OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
+                                {I.getOperand(0), I.getOperand(1)}, &I);
+
+  auto *C1C2VecTy = cast<FixedVectorType>(
+      toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
+  InstructionCost NewCost =
+      TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
+  NewCost +=
+      TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
+  NewCost +=
+      TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
+  NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy,
+                                    CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+  if (NewCost > OldCost)
+    return false;
+
+  Value *ShuffleCmp = Builder.CreateShuffleVector(C1, C2, Mask);
+  Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
+  Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
+  Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+
+  replaceValue(I, *NewSel);
+  return true;
+}
+
 /// Try to convert "shuffle (castop), (castop)" with a shared castop operand
 /// into "castop (shuffle)".
 bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
@@ -3352,6 +3403,7 @@ bool VectorCombine::run() {
       case Instruction::ShuffleVector:
         MadeChange |= foldPermuteOfBinops(I);
         MadeChange |= foldShuffleOfBinops(I);
+        MadeChange |= foldShuffleOfSelects(I);
         MadeChange |= foldShuffleOfCastops(I);
         MadeChange |= foldShuffleOfShuffles(I);
         MadeChange |= foldShuffleOfIntrinsics(I);
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 3bde1a59a4db0..36d2ccd54b13a 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -4,12 +4,28 @@
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
 
 define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
-; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i16> [[RES]]
+; SSE-LABEL: define <4 x i16> @src_v2tov4_i16(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; SSE-NEXT:    ret <4 x i16> [[RES]]
+;
+; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <4 x i16> [[RES]]
+;
+; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x i16> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
   %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
@@ -18,12 +34,28 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1
 }
 
 define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v4tov8_i16(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT:    ret <8 x i16> [[RES]]
 ;
   %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
   %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
@@ -32,12 +64,28 @@ define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i1
 }
 
 define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v8tov8_i16(
+; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT:    [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT:    ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT:    ret <8 x i16> [[RES]]
 ;
   %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
   %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
@@ -47,7 +95,7 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
 
 define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
 ; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -60,12 +108,28 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x
 }
 
 define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
-; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i32> [[RES]]
+; SSE-LABEL: define <4 x i32> @src_v2tov4_i32(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; SSE-NEXT:    ret <4 x i32> [[RES]]
+;
+; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; AVX2-NEXT:    ret <4 x i32> [[RES]]
+;
+; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x i32> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
   %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
@@ -88,12 +152,28 @@ define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i3
 }
 
 define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
-; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    ret <2 x i64> [[RES]]
+; SSE-LABEL: define <2 x i64> @src_v2tov2_i64(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; SSE-NEXT:    ret <2 x i64> [[RES]]
+;
+; AVX2-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; AVX2-NEXT:    ret <2 x i64> [[RES]]
+;
+; AVX512-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT:    ret <2 x i64> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
   %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
@@ -116,12 +196,28 @@ define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
 }
 
 define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -144,12 +240,28 @@ define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <
 }
 
 define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    ret <2 x double> [[RES]]
+; SSE-LABEL: define <2 x double> @src_v2tov2_double(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; SSE-NEXT:    ret <2 x double> [[RES]]
+;
+; AVX2-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; AVX2-NEXT:    ret <2 x double> [[RES]]
+;
+; AVX512-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT:    ret <2 x double> [[RES]]
 ;
   %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
   %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
@@ -173,12 +285,28 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
 
 ; FMF Flags
 define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -187,12 +315,28 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 }
 
 define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -201,12 +345,28 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 }
 
 define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -215,12 +375,28 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 }
 
 define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -229,12 +405,28 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 }
 
 define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -243,12 +435,28 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 }
 
 define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -257,12 +465,28 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 }
 
 define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT:    [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT:    [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT:    ret <4 x float> [[RES]]
 ;
   %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
   %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -313,7 +537,3 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b
   %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
   ret <4 x i32> %res
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX2: {{.*}}
-; AVX512: {{.*}}
-; SSE: {{.*}}

>From 4a551ed4e94a4c4b6b9a287d50efdee2f3571af2 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:26:22 +0900
Subject: [PATCH 3/4] Preserve Fast-Math Flags

---
 .../Transforms/Vectorize/VectorCombine.cpp    |  8 ++++++
 .../VectorCombine/X86/shuffle-of-selects.ll   | 28 +++++++++----------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5e4b4b5a7e4b2..0df6f4101e1f4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1918,6 +1918,12 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   if (!C1VecTy || !C2VecTy)
     return false;
 
+  auto *Select0 = cast<Instruction>(I.getOperand(0));
+  if (auto *SI0FOp = dyn_cast<FPMathOperator>(Select0))
+    if (auto *SI1FOp = dyn_cast<FPMathOperator>((I.getOperand(1))))
+      if (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())
+        return false;
+
   auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
   auto SelOp = Instruction::Select;
   InstructionCost OldCost = TTI.getCmpSelInstrCost(
@@ -1945,6 +1951,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
   Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
   Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+  if (isa<FPMathOperator>(NewSel))
+    cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
 
   replaceValue(I, *NewSel);
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 36d2ccd54b13a..91f7fdd601cdf 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -290,7 +290,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -298,7 +298,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -320,7 +320,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -328,7 +328,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -350,7 +350,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -358,7 +358,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -380,7 +380,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -388,7 +388,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -410,7 +410,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -418,7 +418,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -440,7 +440,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -448,7 +448,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -470,7 +470,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT:    [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; SSE-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
@@ -478,7 +478,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
 ; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; AVX2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT:    [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT:    [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
 ; AVX2-NEXT:    ret <4 x float> [[RES]]
 ;
 ; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(

>From 17a6939bf4c5be34900af34655867e7aa0b9e0f4 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:28:44 +0900
Subject: [PATCH 4/4] NewSel the transformed result append to Worklist

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0df6f4101e1f4..a0f77e3c2ba93 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1954,6 +1954,7 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
   if (isa<FPMathOperator>(NewSel))
     cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
 
+  Worklist.pushValue(NewSel);
   replaceValue(I, *NewSel);
   return true;
 }



More information about the llvm-commits mailing list