[llvm] r250160 - [InstCombine][SSE4A] Remove broken INSERTQI range combining optimization

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 13 07:48:55 PDT 2015


Author: rksimon
Date: Tue Oct 13 09:48:54 2015
New Revision: 250160

URL: http://llvm.org/viewvc/llvm-project?rev=250160&view=rev
Log:
[InstCombine][SSE4A] Remove broken INSERTQI range combining optimization

As discussed in D13348 - the INSERTQI range combining code is wrong in that it confuses the insertion bit index with an extraction bit index.

The remaining legal combines are very unlikely (especially once we've converted to shuffles in D13348) so I'm removing the optimization.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=250160&r1=250159&r2=250160&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Oct 13 09:48:54 2015
@@ -1059,6 +1059,7 @@ Instruction *InstCombiner::visitCallInst
     if (auto CILength = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
       if (auto CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
         unsigned Index = CIIndex->getZExtValue();
+
         // From AMD documentation: "a value of zero in the field length is
         // defined as length of 64".
         unsigned Length = CILength->equalsInt(0) ? 64 : CILength->getZExtValue();
@@ -1077,54 +1078,12 @@ Instruction *InstCombiner::visitCallInst
         if (Length == 64 && Index == 0) {
           Value *Vec = II->getArgOperand(1);
           Value *Undef = UndefValue::get(Vec->getType());
-          const uint32_t Mask[] = { 0, 2 };
+          const uint32_t Mask[] = {0, 2};
           return ReplaceInstUsesWith(
               CI,
               Builder->CreateShuffleVector(
                   Vec, Undef, ConstantDataVector::get(
                                   II->getContext(), makeArrayRef(Mask))));
-        } else if (auto Source =
-                       dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
-          if (Source->hasOneUse() &&
-              Source->getArgOperand(1) == II->getArgOperand(1)) {
-            // If the source of the insert has only one use and it's another
-            // insert (and they're both inserting from the same vector), try to
-            // bundle both together.
-            auto CISourceLength =
-                dyn_cast<ConstantInt>(Source->getArgOperand(2));
-            auto CISourceIndex =
-                dyn_cast<ConstantInt>(Source->getArgOperand(3));
-            if (CISourceIndex && CISourceLength) {
-              unsigned SourceIndex = CISourceIndex->getZExtValue();
-              unsigned SourceLength = CISourceLength->getZExtValue();
-              unsigned SourceEnd = SourceIndex + SourceLength;
-              unsigned NewIndex, NewLength;
-              bool ShouldReplace = false;
-              if (Index <= SourceIndex && SourceIndex <= End) {
-                NewIndex = Index;
-                NewLength = std::max(End, SourceEnd) - NewIndex;
-                ShouldReplace = true;
-              } else if (SourceIndex <= Index && Index <= SourceEnd) {
-                NewIndex = SourceIndex;
-                NewLength = std::max(SourceEnd, End) - NewIndex;
-                ShouldReplace = true;
-              }
-
-              if (ShouldReplace) {
-                Constant *ConstantLength = ConstantInt::get(
-                    II->getArgOperand(2)->getType(), NewLength, false);
-                Constant *ConstantIndex = ConstantInt::get(
-                    II->getArgOperand(3)->getType(), NewIndex, false);
-                Value *Args[4] = { Source->getArgOperand(0),
-                                   II->getArgOperand(1), ConstantLength,
-                                   ConstantIndex };
-                Module *M = CI.getParent()->getParent()->getParent();
-                Value *F =
-                    Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
-                return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
-              }
-            }
-          }
         }
       }
     }
@@ -1220,9 +1179,9 @@ Instruction *InstCombiner::visitCallInst
         // control mask is set, then zero is written in the result byte.
         // The zero vector is in the right-hand side of the resulting
         // shufflevector.
- 
+
         // The value of each index is the least significant 4 bits of the
-        // shuffle control byte.      
+        // shuffle control byte.
         Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
       }
     } else if (!isa<ConstantAggregateZero>(V))

Modified: llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll?rev=250160&r1=250159&r2=250160&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll Tue Oct 13 09:48:54 2015
@@ -1,15 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; We should optimize these two redundant insertqi into one
-define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertTwice
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
-  ret <2 x i64> %2
-}
-
 ; The result of this insert is the second arg, since the top 64 bits of
 ; the result are undefined, and we copy the bottom 64 bits from the
 ; second arg
@@ -20,81 +10,6 @@ define <2 x i64> @testInsert64Bits(<2 x
   ret <2 x i64> %1
 }
 
-; Test the several types of ranges and ordering that exist for two insertqi
-define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertContainedRange
-; CHECK: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertContainedRange_2
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertOverlappingRange
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertOverlappingRange_2
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertAdjacentRange
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertAdjacentRange_2
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK-NEXT: ret <2 x i64> %1
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertDisjointRange
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK-NEXT: %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-; CHECK-NEXT: ret <2 x i64> %2
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsertDisjointRange_2
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK-NEXT: %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-; CHECK-NEXT: ret <2 x i64> %2
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
 ; CHECK-LABEL: @testZeroLength
 ; CHECK-NEXT: ret <2 x i64> %i




More information about the llvm-commits mailing list