[llvm] a3f50fb - [X86] isVectorShiftByScalarCheap - vXi8 select(shift(x,splat0),shift(x,splat1)) is better than shift(x,select(splat0,splat1))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 23 06:30:13 PST 2021
Author: Simon Pilgrim
Date: 2021-12-23T14:30:02Z
New Revision: a3f50fb06dd467e54a62b371117eeecf88c78480
URL: https://github.com/llvm/llvm-project/commit/a3f50fb06dd467e54a62b371117eeecf88c78480
DIFF: https://github.com/llvm/llvm-project/commit/a3f50fb06dd467e54a62b371117eeecf88c78480.diff
LOG: [X86] isVectorShiftByScalarCheap - vXi8 select(shift(x,splat0),shift(x,splat1)) is better than shift(x,select(splat0,splat1))
Even though we don't have vXi8 vector shifts (apart from XOP), it is still better to prefer shift (or funnel-shift/rotate) by scalar where possible.
https://llvm.godbolt.org/z/6ss6ffTxv
Differential Revision: https://reviews.llvm.org/D116191
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3d186857e00ac..ec152d02fd1f4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32994,11 +32994,6 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
unsigned Bits = Ty->getScalarSizeInBits();
- // 8-bit shifts are always expensive, but versions with a scalar amount aren't
- // particularly cheaper than those without.
- if (Bits == 8)
- return false;
-
// XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts.
// Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred.
if (Subtarget.hasXOP() &&
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
index 1c9617ab0d448..576c945eec3c0 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
@@ -37,7 +37,6 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x
; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]]
; XOP-NEXT: ret <4 x i32> [[SH]]
;
-
%splat1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
%splat2 = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> zeroinitializer
%sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2
@@ -78,7 +77,6 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]]
; XOP-NEXT: ret <16 x i16> [[SH]]
;
-
%splat1 = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> zeroinitializer
%splat2 = shufflevector <16 x i16> %y, <16 x i16> poison, <16 x i32> zeroinitializer
%sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2
@@ -87,14 +85,40 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
}
define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
-; ALL-LABEL: @vector_variable_shift_right_v32i8(
-; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
-; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
-; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
-; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
-; ALL-NEXT: ret <32 x i8> [[SH]]
+; AVX1-LABEL: @vector_variable_shift_right_v32i8(
+; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX1-NEXT: ret <32 x i8> [[TMP3]]
+;
+; AVX2-LABEL: @vector_variable_shift_right_v32i8(
+; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX2-NEXT: ret <32 x i8> [[TMP3]]
+;
+; AVX512BW-LABEL: @vector_variable_shift_right_v32i8(
+; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX512BW-NEXT: ret <32 x i8> [[TMP3]]
+;
+; XOP-LABEL: @vector_variable_shift_right_v32i8(
+; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
+; XOP-NEXT: ret <32 x i8> [[SH]]
;
-
%splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer
%splat2 = shufflevector <32 x i8> %y, <32 x i8> poison, <32 x i32> zeroinitializer
%sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2
@@ -233,7 +257,6 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
; XOP: exit:
; XOP-NEXT: ret void
;
-
entry:
%cmp16 = icmp sgt i32 %count, 0
%wide.trip.count = zext i32 %count to i64
@@ -377,7 +400,6 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) {
; XOP: exit:
; XOP-NEXT: ret void
;
-
entry:
%i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0
%s0 = shufflevector <8 x i32> %i0, <8 x i32> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
index 7d685b78f310c..d0ceec32b90ca 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP
; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
@@ -85,12 +85,39 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
}
define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
-; ALL-LABEL: @vector_variable_shift_right_v32i8(
-; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
-; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
-; ALL-NEXT: ret <32 x i8> [[SH]]
+; AVX1-LABEL: @vector_variable_shift_right_v32i8(
+; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX1-NEXT: ret <32 x i8> [[TMP3]]
+;
+; AVX2-LABEL: @vector_variable_shift_right_v32i8(
+; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX2-NEXT: ret <32 x i8> [[TMP3]]
+;
+; AVX512BW-LABEL: @vector_variable_shift_right_v32i8(
+; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX512BW-NEXT: ret <32 x i8> [[TMP3]]
+;
+; XOP-LABEL: @vector_variable_shift_right_v32i8(
+; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
+; XOP-NEXT: ret <32 x i8> [[SH]]
;
%splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer
%splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
index be3723b7d5545..7fd50c4d80d6d 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
@@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.9.0"
define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
-; CHECK-LABEL: @test_8bit(
-; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
-; CHECK: if_true:
-; CHECK-NEXT: ret <16 x i8> [[MASK]]
-; CHECK: if_false:
-; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
-; CHECK-NEXT: ret <16 x i8> [[RES]]
+; CHECK-SSE2-LABEL: @test_8bit(
+; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2: if_true:
+; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-SSE2: if_false:
+; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_8bit(
+; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP: if_true:
+; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-XOP: if_false:
+; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT: ret <16 x i8> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_8bit(
+; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX: if_true:
+; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-AVX: if_false:
+; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX-NEXT: ret <16 x i8> [[RES]]
;
%mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
index a443e71000627..97e00023aa025 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
@@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.9.0"
define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
-; CHECK-LABEL: @test_8bit(
-; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
-; CHECK: if_true:
-; CHECK-NEXT: ret <16 x i8> [[MASK]]
-; CHECK: if_false:
-; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
-; CHECK-NEXT: ret <16 x i8> [[RES]]
+; CHECK-SSE2-LABEL: @test_8bit(
+; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2: if_true:
+; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-SSE2: if_false:
+; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_8bit(
+; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP: if_true:
+; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-XOP: if_false:
+; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT: ret <16 x i8> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_8bit(
+; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX: if_true:
+; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]]
+; CHECK-AVX: if_false:
+; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX-NEXT: ret <16 x i8> [[RES]]
;
%mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
br i1 %tst, label %if_true, label %if_false
More information about the llvm-commits
mailing list