[llvm] [VectorCombine] Handle shuffle of selects (PR #128032)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 22 13:18:08 PST 2025
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/128032
>From 2a4dfdf389ad5363aae2f270138954c4e7d342ca Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:11 +0900
Subject: [PATCH 1/4] add pre-test
---
.../VectorCombine/X86/shuffle-of-selects.ll | 319 ++++++++++++++++++
1 file changed, 319 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
new file mode 100644
index 0000000000000..3bde1a59a4db0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -0,0 +1,319 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
+
+define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
+; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i16> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
+ %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
+ %res = shufflevector <2 x i16> %select.xz, <2 x i16> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %res
+}
+
+define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i16> [[RES]]
+;
+ %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
+ %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
+ %res = shufflevector <4 x i16> %select.xz, <4 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i16> [[RES]]
+;
+ %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+ %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+ %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %res
+}
+
+define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
+; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: ret <16 x i16> [[RES]]
+;
+ %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
+ %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
+ %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %res
+}
+
+define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+ %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+ %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %res
+}
+
+define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z
+ %select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x
+ %res = shufflevector <4 x i32> %select.xz, <4 x i32> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %res
+}
+
+define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x i64> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+ %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+ %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %res
+}
+
+define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i64> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
+ %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
+ %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %res
+}
+
+define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <8 x float> @src_v4tov8_float(
+; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x float> [[RES]]
+;
+ %select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z
+ %select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x
+ %res = shufflevector <4 x float> %select.xz, <4 x float> %select.yx, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %res
+}
+
+define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+ %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+ %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %res
+}
+
+define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <4 x double> @src_v2tov4_double(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x double> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
+ %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
+ %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %res
+}
+
+; FMF Flags
+define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
+ %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
+ %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %res
+}
+
+
+; Negative - Vector order
+define <4 x i32> @src_v2tov4_i32_backward(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_backward(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+ %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+ %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_in_same_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_in_same_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+ %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+ %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_to_other_vector(
+; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
+ %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
+ %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+ ret <4 x i32> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX2: {{.*}}
+; AVX512: {{.*}}
+; SSE: {{.*}}
>From 4c989b9e45ba01e5545160bb217a9ca874ebc46f Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 21 Feb 2025 02:13:24 +0900
Subject: [PATCH 2/4] [VectorCombine] Handle shuffle of selects
(shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m)
-> (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))
The behaviour of SelectInst on vectors is the same as for
`V'select[i] = Condition[i] ? V'True[i] : V'False[i]`.
If a ShuffleVector is performed on two selects, it will be like:
`V'[mask] = (V'select[i] = Condition[i] ? V'True[i] : V'False[i])`
That's why a ShuffleVector with two SelectInst is equivalent to
first ShuffleVector Condition/True/False and then SelectInst that
result.
This patch implements the transforming described above.
Proof: https://alive2.llvm.org/ce/z/97wfHp
Fixed: https://github.com/llvm/llvm-project/issues/120775
---
.../Transforms/Vectorize/VectorCombine.cpp | 52 +++
.../VectorCombine/X86/shuffle-of-selects.ll | 398 ++++++++++++++----
2 files changed, 361 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 746742e14d080..5e4b4b5a7e4b2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -119,6 +119,7 @@ class VectorCombine {
bool foldConcatOfBoolMasks(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
+ bool foldShuffleOfSelects(Instruction &I);
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
bool foldShuffleOfIntrinsics(Instruction &I);
@@ -1899,6 +1900,56 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
return true;
}
+/// Try to convert,
+/// (shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m) into
+/// (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m))
+bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
+ ArrayRef<int> Mask;
+ Value *C1, *T1, *F1, *C2, *T2, *F2;
+ if (!match(&I, m_Shuffle(
+ m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))),
+ m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))),
+ m_Mask(Mask))))
+ return false;
+
+ auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
+ auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
+ auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
+ if (!C1VecTy || !C2VecTy)
+ return false;
+
+ auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
+ auto SelOp = Instruction::Select;
+ InstructionCost OldCost = TTI.getCmpSelInstrCost(
+ SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
+ {I.getOperand(0), I.getOperand(1)}, &I);
+
+ auto *C1C2VecTy = cast<FixedVectorType>(
+ toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
+ InstructionCost NewCost =
+ TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2});
+ NewCost +=
+ TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2});
+ NewCost +=
+ TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2});
+ NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ if (NewCost > OldCost)
+ return false;
+
+ Value *ShuffleCmp = Builder.CreateShuffleVector(C1, C2, Mask);
+ Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
+ Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
+ Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+
+ replaceValue(I, *NewSel);
+ return true;
+}
+
/// Try to convert "shuffle (castop), (castop)" with a shared castop operand
/// into "castop (shuffle)".
bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
@@ -3352,6 +3403,7 @@ bool VectorCombine::run() {
case Instruction::ShuffleVector:
MadeChange |= foldPermuteOfBinops(I);
MadeChange |= foldShuffleOfBinops(I);
+ MadeChange |= foldShuffleOfSelects(I);
MadeChange |= foldShuffleOfCastops(I);
MadeChange |= foldShuffleOfShuffles(I);
MadeChange |= foldShuffleOfIntrinsics(I);
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 3bde1a59a4db0..36d2ccd54b13a 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -4,12 +4,28 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
-; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; SSE-LABEL: define <4 x i16> @src_v2tov4_i16(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; SSE-NEXT: ret <4 x i16> [[RES]]
+;
+; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]]
+; AVX2-NEXT: ret <4 x i16> [[RES]]
+;
+; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x i16> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z
%select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x
@@ -18,12 +34,28 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1
}
define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16(
-; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v4tov8_i16(
+; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT: ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT: ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16(
+; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x i16> [[RES]]
;
%select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z
%select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x
@@ -32,12 +64,28 @@ define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i1
}
define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; CHECK-LABEL: define <8 x i16> @src_v8tov8_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i16> [[RES]]
+; SSE-LABEL: define <8 x i16> @src_v8tov8_i16(
+; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; SSE-NEXT: ret <8 x i16> [[RES]]
+;
+; AVX2-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]]
+; AVX2-NEXT: ret <8 x i16> [[RES]]
+;
+; AVX512-LABEL: define <8 x i16> @src_v8tov8_i16(
+; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX512-NEXT: ret <8 x i16> [[RES]]
;
%select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z
%select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x
@@ -47,7 +95,7 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16(
-; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -60,12 +108,28 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x
}
define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
-; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i32> [[RES]]
+; SSE-LABEL: define <4 x i32> @src_v2tov4_i32(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; SSE-NEXT: ret <4 x i32> [[RES]]
+;
+; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; AVX2-NEXT: ret <4 x i32> [[RES]]
+;
+; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
@@ -88,12 +152,28 @@ define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i3
}
define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
-; CHECK-LABEL: define <2 x i64> @src_v2tov2_i64(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: ret <2 x i64> [[RES]]
+; SSE-LABEL: define <2 x i64> @src_v2tov2_i64(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; SSE-NEXT: ret <2 x i64> [[RES]]
+;
+; AVX2-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; AVX2-NEXT: ret <2 x i64> [[RES]]
+;
+; AVX512-LABEL: define <2 x i64> @src_v2tov2_i64(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT: ret <2 x i64> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z
%select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x
@@ -116,12 +196,28 @@ define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
}
define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -144,12 +240,28 @@ define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <
}
define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: define <2 x double> @src_v2tov2_double(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: ret <2 x double> [[RES]]
+; SSE-LABEL: define <2 x double> @src_v2tov2_double(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; SSE-NEXT: ret <2 x double> [[RES]]
+;
+; AVX2-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; AVX2-NEXT: ret <2 x double> [[RES]]
+;
+; AVX512-LABEL: define <2 x double> @src_v2tov2_double(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT: ret <2 x double> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z
%select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x
@@ -173,12 +285,28 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
; FMF Flags
define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -187,12 +315,28 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
}
define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -201,12 +345,28 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
}
define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -215,12 +375,28 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
}
define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -229,12 +405,28 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
}
define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -243,12 +435,28 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
}
define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -257,12 +465,28 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
}
define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
-; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
-; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
-; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: ret <4 x float> [[RES]]
+;
+; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: ret <4 x float> [[RES]]
+;
+; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
+; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]]
+; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]]
+; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; AVX512-NEXT: ret <4 x float> [[RES]]
;
%select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z
%select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x
@@ -313,7 +537,3 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
ret <4 x i32> %res
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX2: {{.*}}
-; AVX512: {{.*}}
-; SSE: {{.*}}
>From 4a551ed4e94a4c4b6b9a287d50efdee2f3571af2 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:26:22 +0900
Subject: [PATCH 3/4] Preserve Fast-Math Flags
---
.../Transforms/Vectorize/VectorCombine.cpp | 8 ++++++
.../VectorCombine/X86/shuffle-of-selects.ll | 28 +++++++++----------
2 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5e4b4b5a7e4b2..0df6f4101e1f4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1918,6 +1918,12 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
if (!C1VecTy || !C2VecTy)
return false;
+ auto *Select0 = cast<Instruction>(I.getOperand(0));
+ if (auto *SI0FOp = dyn_cast<FPMathOperator>(Select0))
+ if (auto *SI1FOp = dyn_cast<FPMathOperator>((I.getOperand(1))))
+ if (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())
+ return false;
+
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
auto SelOp = Instruction::Select;
InstructionCost OldCost = TTI.getCmpSelInstrCost(
@@ -1945,6 +1951,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask);
Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask);
Value *NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
+ if (isa<FPMathOperator>(NewSel))
+ cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
replaceValue(I, *NewSel);
return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
index 36d2ccd54b13a..91f7fdd601cdf 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
@@ -290,7 +290,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -298,7 +298,7 @@ define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float>
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan(
@@ -320,7 +320,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -328,7 +328,7 @@ define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float>
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf(
@@ -350,7 +350,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -358,7 +358,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x fl
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf(
@@ -380,7 +380,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -388,7 +388,7 @@ define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz(
@@ -410,7 +410,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -418,7 +418,7 @@ define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz(
@@ -440,7 +440,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -448,7 +448,7 @@ define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x flo
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz(
@@ -470,7 +470,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; SSE-NEXT: [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; SSE-NEXT: ret <4 x float> [[RES]]
;
; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
@@ -478,7 +478,7 @@ define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
+; AVX2-NEXT: [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]
; AVX2-NEXT: ret <4 x float> [[RES]]
;
; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(
>From 17a6939bf4c5be34900af34655867e7aa0b9e0f4 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 23 Feb 2025 05:28:44 +0900
Subject: [PATCH 4/4] NewSel the transformed result append to Worklist
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0df6f4101e1f4..a0f77e3c2ba93 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1954,6 +1954,7 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
if (isa<FPMathOperator>(NewSel))
cast<Instruction>(NewSel)->setFastMathFlags(Select0->getFastMathFlags());
+ Worklist.pushValue(NewSel);
replaceValue(I, *NewSel);
return true;
}
More information about the llvm-commits
mailing list