[llvm] a3f50fb - [X86] isVectorShiftByScalarCheap - vXi8 select(shift(x,splat0),shift(x,splat1)) is better than shift(x,select(splat0,splat1))

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 23 06:30:13 PST 2021


Author: Simon Pilgrim
Date: 2021-12-23T14:30:02Z
New Revision: a3f50fb06dd467e54a62b371117eeecf88c78480

URL: https://github.com/llvm/llvm-project/commit/a3f50fb06dd467e54a62b371117eeecf88c78480
DIFF: https://github.com/llvm/llvm-project/commit/a3f50fb06dd467e54a62b371117eeecf88c78480.diff

LOG: [X86] isVectorShiftByScalarCheap - vXi8 select(shift(x,splat0),shift(x,splat1)) is better than shift(x,select(splat0,splat1))

Even though we don't have vXi8 vector shifts (apart from XOP), it is still better to prefer shift (or funnel-shift/rotate) by scalar where possible.

https://llvm.godbolt.org/z/6ss6ffTxv

Differential Revision: https://reviews.llvm.org/D116191

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
    llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
    llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
    llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3d186857e00ac..ec152d02fd1f4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32994,11 +32994,6 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
 bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
   unsigned Bits = Ty->getScalarSizeInBits();
 
-  // 8-bit shifts are always expensive, but versions with a scalar amount aren't
-  // particularly cheaper than those without.
-  if (Bits == 8)
-    return false;
-
   // XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts.
   // Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred.
   if (Subtarget.hasXOP() &&

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
index 1c9617ab0d448..576c945eec3c0 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP
 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
 
 define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
@@ -37,7 +37,6 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x
 ; XOP-NEXT:    [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]]
 ; XOP-NEXT:    ret <4 x i32> [[SH]]
 ;
-
   %splat1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer
   %splat2 = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> zeroinitializer
   %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2
@@ -78,7 +77,6 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
 ; XOP-NEXT:    [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]]
 ; XOP-NEXT:    ret <16 x i16> [[SH]]
 ;
-
   %splat1 = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> zeroinitializer
   %splat2 = shufflevector <16 x i16> %y, <16 x i16> poison, <16 x i32> zeroinitializer
   %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2
@@ -87,14 +85,40 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
 }
 
 define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
-; ALL-LABEL: @vector_variable_shift_right_v32i8(
-; ALL-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
-; ALL-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
-; ALL-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
-; ALL-NEXT:    [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
-; ALL-NEXT:    ret <32 x i8> [[SH]]
+; AVX1-LABEL: @vector_variable_shift_right_v32i8(
+; AVX1-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX1-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX1-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX1-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX1-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; AVX2-LABEL: @vector_variable_shift_right_v32i8(
+; AVX2-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX2-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX2-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX2-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX2-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; AVX512BW-LABEL: @vector_variable_shift_right_v32i8(
+; AVX512BW-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX512BW-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX512BW-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX512BW-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX512BW-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; XOP-LABEL: @vector_variable_shift_right_v32i8(
+; XOP-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; XOP-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
+; XOP-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; XOP-NEXT:    [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
+; XOP-NEXT:    ret <32 x i8> [[SH]]
 ;
-
   %splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer
   %splat2 = shufflevector <32 x i8> %y, <32 x i8> poison, <32 x i32> zeroinitializer
   %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2
@@ -233,7 +257,6 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
 ; XOP:       exit:
 ; XOP-NEXT:    ret void
 ;
-
 entry:
   %cmp16 = icmp sgt i32 %count, 0
   %wide.trip.count = zext i32 %count to i64
@@ -377,7 +400,6 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) {
 ; XOP:       exit:
 ; XOP-NEXT:    ret void
 ;
-
 entry:
   %i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0
   %s0 = shufflevector <8 x i32> %i0, <8 x i32> poison, <8 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
index 7d685b78f310c..d0ceec32b90ca 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
-; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP
+; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP
 ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
 
 define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
@@ -85,12 +85,39 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16
 }
 
 define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
-; ALL-LABEL: @vector_variable_shift_right_v32i8(
-; ALL-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; ALL-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; ALL-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
-; ALL-NEXT:    [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
-; ALL-NEXT:    ret <32 x i8> [[SH]]
+; AVX1-LABEL: @vector_variable_shift_right_v32i8(
+; AVX1-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX1-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX1-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX1-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX1-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; AVX2-LABEL: @vector_variable_shift_right_v32i8(
+; AVX2-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX2-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX2-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX2-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX2-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; AVX512BW-LABEL: @vector_variable_shift_right_v32i8(
+; AVX512BW-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; AVX512BW-NEXT:    [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]]
+; AVX512BW-NEXT:    [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]]
+; AVX512BW-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]
+; AVX512BW-NEXT:    ret <32 x i8> [[TMP3]]
+;
+; XOP-LABEL: @vector_variable_shift_right_v32i8(
+; XOP-NEXT:    [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT:    [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT:    [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]]
+; XOP-NEXT:    [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]]
+; XOP-NEXT:    ret <32 x i8> [[SH]]
 ;
   %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer
   %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
index be3723b7d5545..7fd50c4d80d6d 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll
@@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.9.0"
 
 define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
-; CHECK-LABEL: @test_8bit(
-; CHECK-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
-; CHECK:       if_true:
-; CHECK-NEXT:    ret <16 x i8> [[MASK]]
-; CHECK:       if_false:
-; CHECK-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
-; CHECK-NEXT:    ret <16 x i8> [[RES]]
+; CHECK-SSE2-LABEL: @test_8bit(
+; CHECK-SSE2-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2:       if_true:
+; CHECK-SSE2-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-SSE2:       if_false:
+; CHECK-SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT:    ret <16 x i8> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_8bit(
+; CHECK-XOP-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-XOP-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP:       if_true:
+; CHECK-XOP-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-XOP:       if_false:
+; CHECK-XOP-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT:    ret <16 x i8> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_8bit(
+; CHECK-AVX-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX:       if_true:
+; CHECK-AVX-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-AVX:       if_false:
+; CHECK-AVX-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX-NEXT:    ret <16 x i8> [[RES]]
 ;
   %mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer
   br i1 %tst, label %if_true, label %if_false

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
index a443e71000627..97e00023aa025 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
@@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.9.0"
 
 define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
-; CHECK-LABEL: @test_8bit(
-; CHECK-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
-; CHECK:       if_true:
-; CHECK-NEXT:    ret <16 x i8> [[MASK]]
-; CHECK:       if_false:
-; CHECK-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
-; CHECK-NEXT:    ret <16 x i8> [[RES]]
+; CHECK-SSE2-LABEL: @test_8bit(
+; CHECK-SSE2-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2:       if_true:
+; CHECK-SSE2-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-SSE2:       if_false:
+; CHECK-SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT:    ret <16 x i8> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_8bit(
+; CHECK-XOP-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-XOP-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP:       if_true:
+; CHECK-XOP-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-XOP:       if_false:
+; CHECK-XOP-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT:    ret <16 x i8> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_8bit(
+; CHECK-AVX-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX:       if_true:
+; CHECK-AVX-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK-AVX:       if_false:
+; CHECK-AVX-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-AVX-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX-NEXT:    ret <16 x i8> [[RES]]
 ;
   %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
   br i1 %tst, label %if_true, label %if_false


        


More information about the llvm-commits mailing list