[llvm-commits] [llvm] r167784 - in /llvm/trunk: lib/Transforms/Vectorize/BBVectorize.cpp test/Transforms/BBVectorize/X86/sh-rec2.ll

Mon Nov 12 15:55:36 PST 2012

Author: hfinkel
Date: Mon Nov 12 17:55:36 2012
New Revision: 167784

URL: http://llvm.org/viewvc/llvm-project?rev=167784&view=rev
Log:
BBVectorize: Only some insert element operand pairs are free.

This fixes another infinite recursion case when using target costs.
We can only replace insert element input chains that are pure (end
with inserting into an undef).

Added:
    llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=167784&r1=167783&r2=167784&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Mon Nov 12 17:55:36 2012
@@ -677,6 +677,19 @@
 
       return false;
     }
+
+    bool isPureIEChain(InsertElementInst *IE) {
+      InsertElementInst *IENext = IE;
+      do {
+        if (!isa<UndefValue>(IENext->getOperand(0)) &&
+            !isa<InsertElementInst>(IENext->getOperand(0))) {
+          return false;
+        }
+      } while ((IENext =
+                 dyn_cast<InsertElementInst>(IENext->getOperand(0))));
+
+      return true;
+    }
   };
 
   // This function implements one vectorization iteration on the provided
@@ -1854,7 +1867,9 @@
               // folded with other operations.
               if (Ty1 == Ty2) {
                 // If both are insert elements, then both can be widened.
-                if (isa<InsertElementInst>(O1) && isa<InsertElementInst>(O2))
+                InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
+                                  *IEO2 = dyn_cast<InsertElementInst>(O2);
+                if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
                   continue;
                 // If both are extract elements, and both have the same input
                 // type, then they can be replaced with a shuffle
@@ -2126,18 +2141,7 @@
     if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
       // If we have a pure insertelement chain, then this can be rewritten
       // into a chain that directly builds the larger type.
-      bool PureChain = true;
-      InsertElementInst *LIENext = LIE;
-      do {
-        if (!isa<UndefValue>(LIENext->getOperand(0)) &&
-            !isa<InsertElementInst>(LIENext->getOperand(0))) {
-          PureChain = false;
-          break;
-        }
-      } while ((LIENext =
-                 dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
-      if (PureChain) {
+      if (isPureIEChain(LIE)) {
         SmallVector<Value *, 8> VectElemts(numElemL,
           UndefValue::get(ArgTypeL->getScalarType()));
         InsertElementInst *LIENext = LIE;

Added: llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll?rev=167784&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll (added)
+++ llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll Mon Nov 12 17:55:36 2012
@@ -0,0 +1,85 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
+
+define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
+entry:
+  %xmc = alloca [52 x i16], align 16
+  %arraydecay5 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 0
+  call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
+  %incdec.ptr136 = getelementptr inbounds i8* %c, i64 10
+  %incdec.ptr157 = getelementptr inbounds i8* %c, i64 11
+  store i8 0, i8* %incdec.ptr136, align 1
+  %arrayidx162 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 11
+  %0 = load i16* %arrayidx162, align 2
+  %conv1631 = trunc i16 %0 to i8
+  %and164 = shl i8 %conv1631, 3
+  %shl165 = and i8 %and164, 56
+  %incdec.ptr172 = getelementptr inbounds i8* %c, i64 12
+  store i8 %shl165, i8* %incdec.ptr157, align 1
+  %1 = load i16* inttoptr (i64 2 to i16*), align 2
+  %conv1742 = trunc i16 %1 to i8
+  %and175 = shl i8 %conv1742, 1
+  %incdec.ptr183 = getelementptr inbounds i8* %c, i64 13
+  store i8 %and175, i8* %incdec.ptr172, align 1
+  %incdec.ptr199 = getelementptr inbounds i8* %c, i64 14
+  store i8 0, i8* %incdec.ptr183, align 1
+  %arrayidx214 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 15
+  %incdec.ptr220 = getelementptr inbounds i8* %c, i64 15
+  store i8 0, i8* %incdec.ptr199, align 1
+  %2 = load i16* %arrayidx214, align 2
+  %conv2223 = trunc i16 %2 to i8
+  %and223 = shl i8 %conv2223, 6
+  %incdec.ptr235 = getelementptr inbounds i8* %c, i64 16
+  store i8 %and223, i8* %incdec.ptr220, align 1
+  %arrayidx240 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 19
+  %3 = load i16* %arrayidx240, align 2
+  %conv2414 = trunc i16 %3 to i8
+  %and242 = shl i8 %conv2414, 2
+  %shl243 = and i8 %and242, 28
+  %incdec.ptr251 = getelementptr inbounds i8* %c, i64 17
+  store i8 %shl243, i8* %incdec.ptr235, align 1
+  %incdec.ptr272 = getelementptr inbounds i8* %c, i64 18
+  store i8 0, i8* %incdec.ptr251, align 1
+  %arrayidx282 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 25
+  %4 = load i16* %arrayidx282, align 2
+  %conv2835 = trunc i16 %4 to i8
+  %and284 = and i8 %conv2835, 7
+  %incdec.ptr287 = getelementptr inbounds i8* %c, i64 19
+  store i8 %and284, i8* %incdec.ptr272, align 1
+  %incdec.ptr298 = getelementptr inbounds i8* %c, i64 20
+  store i8 0, i8* %incdec.ptr287, align 1
+  %incdec.ptr314 = getelementptr inbounds i8* %c, i64 21
+  store i8 0, i8* %incdec.ptr298, align 1
+  %arrayidx319 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 26
+  %5 = load i16* %arrayidx319, align 4
+  %conv3206 = trunc i16 %5 to i8
+  %and321 = shl i8 %conv3206, 4
+  %shl322 = and i8 %and321, 112
+  %incdec.ptr335 = getelementptr inbounds i8* %c, i64 22
+  store i8 %shl322, i8* %incdec.ptr314, align 1
+  %arrayidx340 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 29
+  %6 = load i16* %arrayidx340, align 2
+  %conv3417 = trunc i16 %6 to i8
+  %and342 = shl i8 %conv3417, 3
+  %shl343 = and i8 %and342, 56
+  %incdec.ptr350 = getelementptr inbounds i8* %c, i64 23
+  store i8 %shl343, i8* %incdec.ptr335, align 1
+  %incdec.ptr366 = getelementptr inbounds i8* %c, i64 24
+  store i8 0, i8* %incdec.ptr350, align 1
+  %arrayidx381 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 36
+  %incdec.ptr387 = getelementptr inbounds i8* %c, i64 25
+  store i8 0, i8* %incdec.ptr366, align 1
+  %7 = load i16* %arrayidx381, align 8
+  %conv3898 = trunc i16 %7 to i8
+  %and390 = shl i8 %conv3898, 6
+  store i8 %and390, i8* %incdec.ptr387, align 1
+  unreachable
+; CHECK: @gsm_encode
+}
+
+declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
+
+declare void @llvm.trap() noreturn nounwind