[llvm-commits] [dragonegg] r131388 - in /dragonegg/trunk: include/dragonegg/Internals.h src/Convert.cpp

Sun May 15 12:59:30 PDT 2011

Author: baldrick
Date: Sun May 15 14:59:30 2011
New Revision: 131388

URL: http://llvm.org/viewvc/llvm-project?rev=131388&view=rev
Log:
Implement REDUC_MAX_EXPR and REDUC_MIN_EXPR differently, always
operating in a vector of same type rather than halving the length
each time.

Modified:
    dragonegg/trunk/include/dragonegg/Internals.h
    dragonegg/trunk/src/Convert.cpp

Modified: dragonegg/trunk/include/dragonegg/Internals.h
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/include/dragonegg/Internals.h?rev=131388&r1=131387&r2=131388&view=diff
==============================================================================

--- dragonegg/trunk/include/dragonegg/Internals.h (original)
+++ dragonegg/trunk/include/dragonegg/Internals.h Sun May 15 14:59:30 2011
@@ -886,10 +886,6 @@
   /// elements of the given vector with indices in the bottom half.
   Value *VectorLowElements(Value *Vec);
 
-  /// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
-  /// of the two pieces; repeat recursively on the result until scalar.
-  Value *ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred);
-
 private:
   // Optional target defined builtin intrinsic expanding function.
   bool TargetIntrinsicLower(gimple_statement_d *stmt,

Modified: dragonegg/trunk/src/Convert.cpp
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/Convert.cpp?rev=131388&r1=131387&r2=131388&view=diff
==============================================================================
--- dragonegg/trunk/src/Convert.cpp (original)
+++ dragonegg/trunk/src/Convert.cpp Sun May 15 14:59:30 2011
@@ -6480,7 +6480,7 @@
   Value *Compare;
   if (FLOAT_TYPE_P(TREE_TYPE(op0)))
     Compare = Builder.CreateFCmp(FCmpInst::Predicate(FPPred), LHS, RHS);
-  else if (TYPE_UNSIGNED(TREE_TYPE(op1)))
+  else if (TYPE_UNSIGNED(TREE_TYPE(op0)))
     Compare = Builder.CreateICmp(ICmpInst::Predicate(UIPred), LHS, RHS);
   else
     Compare = Builder.CreateICmp(ICmpInst::Predicate(SIPred), LHS, RHS);
@@ -6488,56 +6488,53 @@
   return Builder.CreateSelect(Compare, LHS, RHS);
 }
 
-/// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
-/// of the two pieces; repeat recursively on the result until scalar.
-Value *TreeToLLVM::ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred) {
-  const VectorType *Ty = cast<VectorType>(Op->getType());
-  unsigned NumElts = Ty->getNumElements();
-  assert(NumElts > 1 && !(NumElts & (NumElts - 1)) &&
-         "Number of vector elements is not a power of 2!");
-
-  if (NumElts == 2) {
-    // Extract each of the two elements and return the max/min of them.
-    const Type *Int32Ty = Type::getInt32Ty(Context);
-    Value *LHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 0));
-    Value *RHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 1));
-    Value *Compare = CmpInst::isFPPredicate(Pred) ?
-      Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
-    return Builder.CreateSelect(Compare, LHS, RHS);
-  }
-
-  // Recursively extract the high and low halves of the vector and take the
-  // max/min of them.  Using vector select like this results in better code
-  // if the target supports it for at least one of the resulting vector sizes.
-  Value *LHS = VectorLowElements(Op), *RHS = VectorHighElements(Op);
-  Value *Compare = CmpInst::isFPPredicate(Pred) ?
-    Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
-  Op = Builder.CreateSelect(Compare, LHS, RHS);
-  return ReducMinMaxExprHelper(Op, Pred);
-}
-
 Value *TreeToLLVM::EmitReg_ReducMinMaxExpr(tree op, unsigned UIPred,
                                            unsigned SIPred, unsigned FPPred) {
-  // Use a divide and conquer scheme that results in the same code as the simple
-  // scalar implementation on targets that don't support vector select but gives
-  // better code if vector select is present.
-  // For example, reduc-max <float x0, float x1, float x2, float x3> becomes
-  //   v = max <float x0, float x1>, <float x2, float x3>   <- vector select
-  //   m = max float v0, float v1   <- scalar select; v = <float v0, float v1>
-  // The final result, m, equals max(max(x0,x2),max(x1,x3)) = max(x0,x1,x2,x3).
+  // In the bottom half of the vector, form the max/min of the bottom and top
+  // halves of the vector.  Rinse and repeat on the just computed bottom half:
+  // in the bottom quarter of the vector, form the max/min of the bottom and
+  // top halves of the bottom half.  Continue until only the first element of
+  // the vector is computed.  For example, reduc-max <x0, x1, x2, x3> becomes
+  //   v = max <x0, x1, undef, undef>, <x2, x3, undef, undef>
+  //   w = max <v0, undef, undef, undef>, <v1, undef, undef, undef>
+  // where v = <v0, v1, undef, undef>.  The first element of w is the max/min
+  // of x0,x1,x2,x3.
   Value *Val = EmitRegister(op);
-  Value *Res;
-  if (FLOAT_TYPE_P(TREE_TYPE(op)))
-    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(FPPred));
-  else if (TYPE_UNSIGNED(TREE_TYPE(op)))
-    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(UIPred));
-  else
-    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(SIPred));
+  const Type *Ty = Val->getType();
+
+  CmpInst::Predicate Pred =
+    CmpInst::Predicate(FLOAT_TYPE_P(TREE_TYPE(op)) ?
+                       FPPred : TYPE_UNSIGNED(TREE_TYPE(op)) ? UIPred : SIPred);
+
+  unsigned Length = TYPE_VECTOR_SUBPARTS(TREE_TYPE(op));
+  assert(Length > 1 && !(Length & (Length - 1)) && "Length not a power of 2!");
+  SmallVector<Constant*, 8> Mask(Length);
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  Constant *UndefIndex = UndefValue::get(Int32Ty);
+  for (unsigned Elts = Length >> 1; Elts; Elts >>= 1) {
+    // In the extracted vectors, elements with index Elts and on are undefined.
+    for (unsigned i = Elts; i != Length; ++i)
+      Mask[i] = UndefIndex;
+    // Extract elements [0, Elts) from Val.
+    for (unsigned i = 0; i != Elts; ++i)
+      Mask[i] = ConstantInt::get(Int32Ty, i);
+    Value *LHS = Builder.CreateShuffleVector(Val, UndefValue::get(Ty),
+                                             ConstantVector::get(Mask));
+    // Extract elements [Elts, 2*Elts) from Val.
+    for (unsigned i = 0; i != Elts; ++i)
+      Mask[i] = ConstantInt::get(Int32Ty, Elts + i);
+    Value *RHS = Builder.CreateShuffleVector(Val, UndefValue::get(Ty),
+                                             ConstantVector::get(Mask));
+
+    // Replace Val with the max/min of the extracted elements.
+    Value *Compare = FLOAT_TYPE_P(TREE_TYPE(op)) ?
+      Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
+    Val = Builder.CreateSelect(Compare, LHS, RHS);
+
+    // Repeat, using half as many elements.
+  }
 
-  // The result is vector with the max/min as first element.
-  return Builder.CreateInsertElement(UndefValue::get(Val->getType()), Res,
-                                     ConstantInt::get(Type::getInt32Ty(Context),
-                                                      0));
+  return Val;
 }
 
 Value *TreeToLLVM::EmitReg_RotateOp(tree type, tree op0, tree op1,