[llvm] 178f471 - [CostModel][X86] getShuffleCost - shuffles with only one defined element are always cheap (#124412)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 07:56:25 PST 2025


Author: Simon Pilgrim
Date: 2025-01-27T15:56:22Z
New Revision: 178f47143a3b3c547df6d1f07e9707792f5d9fd4

URL: https://github.com/llvm/llvm-project/commit/178f47143a3b3c547df6d1f07e9707792f5d9fd4
DIFF: https://github.com/llvm/llvm-project/commit/178f47143a3b3c547df6d1f07e9707792f5d9fd4.diff

LOG: [CostModel][X86] getShuffleCost - shuffles with only one defined element are always cheap (#124412)

If we're just moving a single element around inside a 128-bit lane (probably as an alternative to extracting it), we can assume this is cheap as a single PSRLDQ/PSHUFD/SHUFPS.

I've got the horrid feeling we're moving towards matching all SSE shuffle patterns inside the cost model, but I'm going to do my best to avoid this for now :|

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/reduction.ll
    llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
    llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
    llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
    llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
    llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index cdc2ce752743cc..a64d65cb770b0f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1565,19 +1565,25 @@ InstructionCost X86TTIImpl::getShuffleCost(
 
   // Attempt to detect a cheaper inlane shuffle, avoiding 128-bit subvector
   // permutation.
+  // Attempt to detect a shuffle mask with a single defined element.
   bool IsInLaneShuffle = false;
+  bool IsSingleElementMask = false;
   if (BaseTp->getPrimitiveSizeInBits() > 0 &&
       (BaseTp->getPrimitiveSizeInBits() % 128) == 0 &&
       BaseTp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
       Mask.size() == BaseTp->getElementCount().getKnownMinValue()) {
     unsigned NumLanes = BaseTp->getPrimitiveSizeInBits() / 128;
     unsigned NumEltsPerLane = Mask.size() / NumLanes;
-    if ((Mask.size() % NumLanes) == 0)
+    if ((Mask.size() % NumLanes) == 0) {
       IsInLaneShuffle = all_of(enumerate(Mask), [&](const auto &P) {
         return P.value() == PoisonMaskElem ||
                ((P.value() % Mask.size()) / NumEltsPerLane) ==
                    (P.index() / NumEltsPerLane);
       });
+      IsSingleElementMask = (Mask.size() - 1) == count_if(Mask, [](int M) {
+                              return M == PoisonMaskElem;
+                            });
+    }
   }
 
   // Treat <X x bfloat> shuffles as <X x half>.
@@ -1791,6 +1797,11 @@ InstructionCost X86TTIImpl::getShuffleCost(
     return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
   }
 
+  // If we're just moving a single element around (probably as an alternative to
+  // extracting it), we can assume this is cheap.
+  if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
+    return TTI::TCC_Basic;
+
   static const CostTblEntry AVX512VBMIShuffleTbl[] = {
       {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
       {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb

diff  --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll
index d7cf8e6cb8905b..5ff3920c638749 100644
--- a/llvm/test/Analysis/CostModel/X86/reduction.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduction.ll
@@ -638,7 +638,7 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
@@ -1133,7 +1133,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 4b1234fda0e18b..056d9d1fba1415 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -59,18 +59,15 @@ define <8 x i16> @add_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
 
 define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
 ; SSE2-LABEL: @add_v8i16_u1234567(
-; SSE2-NEXT:    [[A2:%.*]] = extractelement <8 x i16> [[A:%.*]], i64 2
-; SSE2-NEXT:    [[A3:%.*]] = extractelement <8 x i16> [[A]], i64 3
-; SSE2-NEXT:    [[A4:%.*]] = extractelement <8 x i16> [[A]], i64 4
-; SSE2-NEXT:    [[A5:%.*]] = extractelement <8 x i16> [[A]], i64 5
-; SSE2-NEXT:    [[A6:%.*]] = extractelement <8 x i16> [[A]], i64 6
-; SSE2-NEXT:    [[A7:%.*]] = extractelement <8 x i16> [[A]], i64 7
-; SSE2-NEXT:    [[A23:%.*]] = add i16 [[A2]], [[A3]]
-; SSE2-NEXT:    [[A45:%.*]] = add i16 [[A4]], [[A5]]
-; SSE2-NEXT:    [[A67:%.*]] = add i16 [[A6]], [[A7]]
-; SSE2-NEXT:    [[HADD1:%.*]] = insertelement <8 x i16> poison, i16 [[A23]], i64 1
-; SSE2-NEXT:    [[HADD2:%.*]] = insertelement <8 x i16> [[HADD1]], i16 [[A45]], i64 2
-; SSE2-NEXT:    [[HADD3:%.*]] = insertelement <8 x i16> [[HADD2]], i16 [[A67]], i64 3
+; SSE2-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[A]], [[SHIFT2]]
+; SSE2-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
+; SSE2-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[A]], [[SHIFT3]]
+; SSE2-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[HADD1:%.*]] = add <8 x i16> [[TMP7]], [[TMP4]]
+; SSE2-NEXT:    [[HADD2:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
index c9cba0a4cc0ff2..572ec9efafe1ae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
@@ -59,18 +59,15 @@ define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
 
 define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
 ; SSE2-LABEL: @sub_v8i16_u1234567(
-; SSE2-NEXT:    [[A2:%.*]] = extractelement <8 x i16> [[A:%.*]], i64 2
-; SSE2-NEXT:    [[A3:%.*]] = extractelement <8 x i16> [[A]], i64 3
-; SSE2-NEXT:    [[A4:%.*]] = extractelement <8 x i16> [[A]], i64 4
-; SSE2-NEXT:    [[A5:%.*]] = extractelement <8 x i16> [[A]], i64 5
-; SSE2-NEXT:    [[A6:%.*]] = extractelement <8 x i16> [[A]], i64 6
-; SSE2-NEXT:    [[A7:%.*]] = extractelement <8 x i16> [[A]], i64 7
-; SSE2-NEXT:    [[A23:%.*]] = sub i16 [[A2]], [[A3]]
-; SSE2-NEXT:    [[A45:%.*]] = sub i16 [[A4]], [[A5]]
-; SSE2-NEXT:    [[A67:%.*]] = sub i16 [[A6]], [[A7]]
-; SSE2-NEXT:    [[HSUB1:%.*]] = insertelement <8 x i16> poison, i16 [[A23]], i64 1
-; SSE2-NEXT:    [[HSUB2:%.*]] = insertelement <8 x i16> [[HSUB1]], i16 [[A45]], i64 2
-; SSE2-NEXT:    [[HSUB3:%.*]] = insertelement <8 x i16> [[HSUB2]], i16 [[A67]], i64 3
+; SSE2-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[TMP5:%.*]] = sub <8 x i16> [[A]], [[SHIFT2]]
+; SSE2-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
+; SSE2-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]]
+; SSE2-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[HSUB1:%.*]] = sub <8 x i16> [[TMP7]], [[TMP4]]
+; SSE2-NEXT:    [[HSUB2:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE2-NEXT:    [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index f3b7f7b72ee425..d369279c15db44 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -252,17 +252,11 @@ define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext0_ext1_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
-; SSE-NEXT:    [[R:%.*]] = add nuw i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext0_ext1_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext0_ext1_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 0
   %e1 = extractelement <16 x i8> %y, i32 1
@@ -271,17 +265,11 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext5_ext0_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
-; SSE-NEXT:    [[R:%.*]] = sub nsw i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext5_ext0_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext5_ext0_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 5
   %e1 = extractelement <16 x i8> %y, i32 0
@@ -290,17 +278,11 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext1_ext6_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
-; SSE-NEXT:    [[R:%.*]] = and i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext1_ext6_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext1_ext6_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 1
   %e1 = extractelement <16 x i8> %y, i32 6

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index c125b73fccddf9..7cbe1c6cec9062 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -252,17 +252,11 @@ define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext0_ext1_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
-; SSE-NEXT:    [[R:%.*]] = add nuw i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext0_ext1_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext0_ext1_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 0
   %e1 = extractelement <16 x i8> %y, i32 1
@@ -271,17 +265,11 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext5_ext0_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
-; SSE-NEXT:    [[R:%.*]] = sub nsw i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext5_ext0_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext5_ext0_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 5
   %e1 = extractelement <16 x i8> %y, i32 0
@@ -290,17 +278,11 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
 }
 
 define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) {
-; SSE-LABEL: @ext1_ext6_add(
-; SSE-NEXT:    [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
-; SSE-NEXT:    [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
-; SSE-NEXT:    [[R:%.*]] = and i8 [[E0]], [[E1]]
-; SSE-NEXT:    ret i8 [[R]]
-;
-; AVX-LABEL: @ext1_ext6_add(
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
-; AVX-NEXT:    ret i8 [[R]]
+; CHECK-LABEL: @ext1_ext6_add(
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 1
   %e1 = extractelement <16 x i8> %y, i32 6

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 2db1e21b3e95a7..f57583a3f53a64 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -265,16 +265,10 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
 define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync {
-; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
-; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
-; SSE2-NEXT:    [[S:%.*]] = load i16, ptr [[GEP]], align 2
-; SSE2-NEXT:    [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
-; SSE2-NEXT:    ret <8 x i16> [[R]]
-;
-; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT:    ret <8 x i16> [[R]]
+; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
   %s = load i16, ptr %gep, align 2
@@ -285,16 +279,10 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
 ; Verify that alignment of the new load is not over-specified.
 
 define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync {
-; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
-; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
-; SSE2-NEXT:    [[S:%.*]] = load i16, ptr [[GEP]], align 8
-; SSE2-NEXT:    [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
-; SSE2-NEXT:    ret <8 x i16> [[R]]
-;
-; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT:    ret <8 x i16> [[R]]
+; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
   %s = load i16, ptr %gep, align 8
@@ -603,17 +591,10 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
 define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync {
-; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
-; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
-; SSE2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
-; SSE2-NEXT:    [[S:%.*]] = load i16, ptr [[TMP1]], align 8
-; SSE2-NEXT:    [[R:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
-; SSE2-NEXT:    ret <8 x i16> [[R]]
-;
-; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX2-NEXT:    ret <8 x i16> [[R]]
+; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1
   %l = load <2 x i16>, ptr %gep, align 8


        


More information about the llvm-commits mailing list