[llvm-commits] [llvm] r167750 - in /llvm/trunk: lib/Transforms/Vectorize/BBVectorize.cpp test/Transforms/BBVectorize/X86/sh-rec.ll

Hal Finkel hfinkel at anl.gov
Mon Nov 12 13:21:02 PST 2012


Author: hfinkel
Date: Mon Nov 12 15:21:02 2012
New Revision: 167750

URL: http://llvm.org/viewvc/llvm-project?rev=167750&view=rev
Log:
BBVectorize: Use a more sophisticated check for input cost

The old checking code, which assumed that input shuffles and insert-elements
could always be folded (and thus were free) is too simple.
This can only happen in special circumstances.
Using the simple check caused infinite recursion.

Added:
    llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=167750&r1=167749&r2=167750&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Mon Nov 12 15:21:02 2012
@@ -28,6 +28,7 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -400,6 +401,7 @@
         DEBUG(dbgs() << "BBV: fusing loop #" << n <<
               " for " << BB.getName() << " in " <<
               BB.getParent()->getName() << "...\n");
+assert(n < 10 && "hrmm, really?");
         if (vectorizePairs(BB))
           changed = true;
         else
@@ -1765,9 +1767,12 @@
             bool NeedsExtraction = false;
             for (Value::use_iterator I = S->first->use_begin(),
                  IE = S->first->use_end(); I != IE; ++I) {
-              if (isa<ShuffleVectorInst>(*I) ||
-                  isa<InsertElementInst>(*I) ||
-                  isa<ExtractElementInst>(*I))
+              if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+                // Shuffle can be folded if it has no other input
+                if (isa<UndefValue>(SI->getOperand(1)))
+                  continue;
+              }
+              if (isa<ExtractElementInst>(*I))
                 continue;
               if (PrunedTreeInstrs.count(*I))
                 continue;
@@ -1792,9 +1797,12 @@
             NeedsExtraction = false;
             for (Value::use_iterator I = S->second->use_begin(),
                  IE = S->second->use_end(); I != IE; ++I) {
-              if (isa<ShuffleVectorInst>(*I) ||
-                  isa<InsertElementInst>(*I) ||
-                  isa<ExtractElementInst>(*I))
+              if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+                // Shuffle can be folded if it has no other input
+                if (isa<UndefValue>(SI->getOperand(1)))
+                  continue;
+              }
+              if (isa<ExtractElementInst>(*I))
                 continue;
               if (PrunedTreeInstrs.count(*I))
                 continue;
@@ -1844,14 +1852,35 @@
 
               // Combining vector operations of the same type is also assumed
               // folded with other operations.
-              if (Ty1 == Ty2 &&
-                  (isa<ShuffleVectorInst>(O1) ||
-                   isa<InsertElementInst>(O1) ||
-                   isa<InsertElementInst>(O1)) &&
-                  (isa<ShuffleVectorInst>(O2) ||
-                   isa<InsertElementInst>(O2) ||
-                   isa<InsertElementInst>(O2)))
-                continue;
+              if (Ty1 == Ty2) {
+                // If both are insert elements, then both can be widened.
+                if (isa<InsertElementInst>(O1) && isa<InsertElementInst>(O2))
+                  continue;
+                // If both are extract elements, and both have the same input
+                // type, then they can be replaced with a shuffle
+                ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
+                                   *EIO2 = dyn_cast<ExtractElementInst>(O2);
+                if (EIO1 && EIO2 &&
+                    EIO1->getOperand(0)->getType() ==
+                      EIO2->getOperand(0)->getType())
+                  continue;
+                // If both are a shuffle with equal operand types and only two
+                // unqiue operands, then they can be replaced with a single
+                // shuffle
+                ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
+                                  *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
+                if (SIO1 && SIO2 &&
+                    SIO1->getOperand(0)->getType() ==
+                      SIO2->getOperand(0)->getType()) {
+                  SmallSet<Value *, 4> SIOps;
+                  SIOps.insert(SIO1->getOperand(0));
+                  SIOps.insert(SIO1->getOperand(1));
+                  SIOps.insert(SIO2->getOperand(0));
+                  SIOps.insert(SIO2->getOperand(1));
+                  if (SIOps.size() <= 2)
+                    continue;
+                }
+              }
 
               int ESContrib;
               // This pair has already been formed.

Added: llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll?rev=167750&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll (added)
+++ llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll Mon Nov 12 15:21:02 2012
@@ -0,0 +1,54 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define void @ptoa() nounwind uwtable {
+entry:
+  %call = call i8* @malloc() nounwind
+  br i1 undef, label %return, label %if.end10
+
+if.end10:                                         ; preds = %entry
+  %incdec.ptr = getelementptr inbounds i8* %call, i64 undef
+  %call17 = call i32 @ptou() nounwind
+  %incdec.ptr26.1 = getelementptr inbounds i8* %incdec.ptr, i64 -2
+  store i8 undef, i8* %incdec.ptr26.1, align 1
+  %div27.1 = udiv i32 %call17, 100
+  %rem.2 = urem i32 %div27.1, 10
+  %add2230.2 = or i32 %rem.2, 48
+  %conv25.2 = trunc i32 %add2230.2 to i8
+  %incdec.ptr26.2 = getelementptr inbounds i8* %incdec.ptr, i64 -3
+  store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
+  %incdec.ptr26.3 = getelementptr inbounds i8* %incdec.ptr, i64 -4
+  store i8 undef, i8* %incdec.ptr26.3, align 1
+  %div27.3 = udiv i32 %call17, 10000
+  %rem.4 = urem i32 %div27.3, 10
+  %add2230.4 = or i32 %rem.4, 48
+  %conv25.4 = trunc i32 %add2230.4 to i8
+  %incdec.ptr26.4 = getelementptr inbounds i8* %incdec.ptr, i64 -5
+  store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
+  %div27.4 = udiv i32 %call17, 100000
+  %rem.5 = urem i32 %div27.4, 10
+  %add2230.5 = or i32 %rem.5, 48
+  %conv25.5 = trunc i32 %add2230.5 to i8
+  %incdec.ptr26.5 = getelementptr inbounds i8* %incdec.ptr, i64 -6
+  store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
+  %incdec.ptr26.6 = getelementptr inbounds i8* %incdec.ptr, i64 -7
+  store i8 0, i8* %incdec.ptr26.6, align 1
+  %incdec.ptr26.7 = getelementptr inbounds i8* %incdec.ptr, i64 -8
+  store i8 undef, i8* %incdec.ptr26.7, align 1
+  %div27.7 = udiv i32 %call17, 100000000
+  %rem.8 = urem i32 %div27.7, 10
+  %add2230.8 = or i32 %rem.8, 48
+  %conv25.8 = trunc i32 %add2230.8 to i8
+  %incdec.ptr26.8 = getelementptr inbounds i8* %incdec.ptr, i64 -9
+  store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: @ptoa
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()





More information about the llvm-commits mailing list