[llvm-commits] [llvm] r103354 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineCasts.cpp test/Transforms/InstCombine/cast.ll

Sat May 8 14:50:26 PDT 2010

Author: lattner
Date: Sat May  8 16:50:26 2010
New Revision: 103354

URL: http://llvm.org/viewvc/llvm-project?rev=103354&view=rev
Log:
Teach instcombine to transform a bitcast/(zext|trunc)/bitcast sequence
with a vector input and output into a shuffle vector.  This sort of 
sequence happens when the input code stores with one type and reloads
with another type and then SROA promotes to i96 integers, which make
everyone sad.

This fixes rdar://7896024


Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/trunk/test/Transforms/InstCombine/cast.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=103354&r1=103353&r2=103354&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Sat May  8 16:50:26 2010
@@ -1252,6 +1252,64 @@
   return commonPointerCastTransforms(CI);
 }
 
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+  return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
   // otherwise just apply the common ones.
@@ -1310,6 +1368,18 @@
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
+    
+    // If this is a cast from an integer to vector, check to see if the input
+    // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+    // the casts with a shuffle and (potentially) a bitcast.
+    if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+      CastInst *SrcCast = cast<CastInst>(Src);
+      if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+        if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+          if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+            return I;
+    }
   }
 
   if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {

Modified: llvm/trunk/test/Transforms/InstCombine/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/cast.ll?rev=103354&r1=103353&r2=103354&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/cast.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/cast.ll Sat May  8 16:50:26 2010
@@ -605,3 +605,36 @@
 ; CHECK-NOT: i32
 ; CHECK:   ret i64 %H
 }
+
+define <3 x i32> @test60(<4 x i32> %call4) nounwind {
+  %tmp11 = bitcast <4 x i32> %call4 to i128
+  %tmp9 = trunc i128 %tmp11 to i96
+  %tmp10 = bitcast i96 %tmp9 to <3 x i32>
+  ret <3 x i32> %tmp10
+  
+; CHECK: @test60
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test61(<3 x i32> %call4) nounwind {
+  %tmp11 = bitcast <3 x i32> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test61
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test62(<3 x float> %call4) nounwind {
+  %tmp11 = bitcast <3 x float> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test62
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+