[llvm-commits] [dragonegg] r131293 - in /dragonegg/trunk: include/dragonegg/Internals.h src/Convert.cpp

Fri May 13 08:04:04 PDT 2011

Author: baldrick
Date: Fri May 13 10:04:04 2011
New Revision: 131293

URL: http://llvm.org/viewvc/llvm-project?rev=131293&view=rev
Log:
Add support for REDUC_MAX_EXPR and REDUC_MIN_EXPR which may be formed
by the GCC tree vectorizer.

Modified:
    dragonegg/trunk/include/dragonegg/Internals.h
    dragonegg/trunk/src/Convert.cpp

Modified: dragonegg/trunk/include/dragonegg/Internals.h
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/include/dragonegg/Internals.h?rev=131293&r1=131292&r2=131293&view=diff
==============================================================================

--- dragonegg/trunk/include/dragonegg/Internals.h (original)
+++ dragonegg/trunk/include/dragonegg/Internals.h Fri May 13 10:04:04 2011
@@ -693,6 +693,8 @@
   // Binary expressions.
   Value *EmitReg_MinMaxExpr(tree_node *op0, tree_node *op1, unsigned UIPred,
                             unsigned SIPred, unsigned Opc);
+  Value *EmitReg_ReducMinMaxExpr(tree_node *op, unsigned UIPred,
+                                 unsigned SIPred, unsigned Opc);
   Value *EmitReg_RotateOp(tree_node *type, tree_node *op0, tree_node *op1,
                           unsigned Opc1, unsigned Opc2);
   Value *EmitReg_ShiftOp(tree_node *op0, tree_node *op1, unsigned Opc);
@@ -876,6 +878,18 @@
   void StoreRegisterToMemory(Value *V, MemRef Loc, tree_node *type,
                              LLVMBuilder &Builder);
 
+  /// VectorHighElements - Return a vector of half the length, consisting of the
+  /// elements of the given vector with indices in the top half.
+  Value *VectorHighElements(Value *Vec);
+
+  /// VectorLowElements - Return a vector of half the length, consisting of the
+  /// elements of the given vector with indices in the bottom half.
+  Value *VectorLowElements(Value *Vec);
+
+  /// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
+  /// of the two pieces; repeat recursively on the result until scalar.
+  Value *ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred);
+
 private:
   // Optional target defined builtin intrinsic expanding function.
   bool TargetIntrinsicLower(gimple_statement_d *stmt,

Modified: dragonegg/trunk/src/Convert.cpp
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/Convert.cpp?rev=131293&r1=131292&r2=131293&view=diff
==============================================================================
--- dragonegg/trunk/src/Convert.cpp (original)
+++ dragonegg/trunk/src/Convert.cpp Fri May 13 10:04:04 2011
@@ -6200,6 +6200,36 @@
   SI->setAlignment(Loc.getAlignment());
 }
 
+/// VectorHighElements - Return a vector of half the length, consisting of the
+/// elements of the given vector with indices in the top half.
+Value *TreeToLLVM::VectorHighElements(Value *Vec) {
+  const VectorType *Ty = cast<VectorType>(Vec->getType());
+  assert(!(Ty->getNumElements() & 1) && "Vector has odd number of elements!");
+  unsigned NumElts = Ty->getNumElements() / 2;
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(NumElts);
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantInt::get(Int32Ty, NumElts + i));
+  return Builder.CreateShuffleVector(Vec, UndefValue::get(Ty),
+                                     ConstantVector::get(Mask));
+}
+
+/// VectorLowElements - Return a vector of half the length, consisting of the
+/// elements of the given vector with indices in the bottom half.
+Value *TreeToLLVM::VectorLowElements(Value *Vec) {
+  const VectorType *Ty = cast<VectorType>(Vec->getType());
+  assert(!(Ty->getNumElements() & 1) && "Vector has odd number of elements!");
+  unsigned NumElts = Ty->getNumElements() / 2;
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(NumElts);
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantInt::get(Int32Ty, i));
+  return Builder.CreateShuffleVector(Vec, UndefValue::get(Ty),
+                                     ConstantVector::get(Mask));
+}
+
 
 //===----------------------------------------------------------------------===//
 //           ... EmitReg* - Convert register expression to LLVM...
@@ -6458,6 +6488,58 @@
   return Builder.CreateSelect(Compare, LHS, RHS);
 }
 
+/// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
+/// of the two pieces; repeat recursively on the result until scalar.
+Value *TreeToLLVM::ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred) {
+  const VectorType *Ty = cast<VectorType>(Op->getType());
+  unsigned NumElts = Ty->getNumElements();
+  assert(NumElts > 1 && !(NumElts & (NumElts - 1)) &&
+         "Number of vector elements is not a power of 2!");
+
+  if (NumElts == 2) {
+    // Extract each of the two elements and return the max/min of them.
+    const Type *Int32Ty = Type::getInt32Ty(Context);
+    Value *LHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 0));
+    Value *RHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 1));
+    Value *Compare = CmpInst::isFPPredicate(Pred) ?
+      Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
+    return Builder.CreateSelect(Compare, LHS, RHS);
+  }
+
+  // Recursively extract the high and low halves of the vector and take the
+  // max/min of them.  Using vector select like this results in better code
+  // if the target supports it for at least one of the resulting vector sizes.
+  Value *LHS = VectorLowElements(Op), *RHS = VectorHighElements(Op);
+  Value *Compare = CmpInst::isFPPredicate(Pred) ?
+    Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
+  Op = Builder.CreateSelect(Compare, LHS, RHS);
+  return ReducMinMaxExprHelper(Op, Pred);
+}
+
+Value *TreeToLLVM::EmitReg_ReducMinMaxExpr(tree op, unsigned UIPred,
+                                           unsigned SIPred, unsigned FPPred) {
+  // Use a divide and conquer scheme that results in the same code as the simple
+  // scalar implementation on targets that don't support vector select but gives
+  // better code if vector select is present.
+  // For example, reduc-max <float x0, float x1, float x2, float x3> becomes
+  //   v = max <float x0, float x1>, <float x2, float x3>   <- vector select
+  //   m = max float v0, float v1   <- scalar select; v = <float v0, float v1>
+  // The final result, m, equals max(max(x0,x2),max(x1,x3)) = max(x0,x1,x2,x3).
+  Value *Val = EmitRegister(op);
+  Value *Res;
+  if (FLOAT_TYPE_P(TREE_TYPE(op)))
+    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(FPPred));
+  else if (TYPE_UNSIGNED(TREE_TYPE(op)))
+    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(UIPred));
+  else
+    Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(SIPred));
+
+  // The result is vector with the max/min as first element.
+  return Builder.CreateInsertElement(UndefValue::get(Val->getType()), Res,
+                                     ConstantInt::get(Type::getInt32Ty(Context),
+                                                      0));
+}
+
 Value *TreeToLLVM::EmitReg_RotateOp(tree type, tree op0, tree op1,
                                     unsigned Opc1, unsigned Opc2) {
   Value *In  = EmitRegister(op0);
@@ -7011,13 +7093,7 @@
   Value *Op = EmitRegister(op0);
 
   // Extract the high elements, eg: <4 x float> -> <2 x float>.
-  unsigned Length = TYPE_VECTOR_SUBPARTS(type);
-  SmallVector<Constant*, 16> Mask;
-  Mask.reserve(Length);
-  for (unsigned i = 0; i != Length; ++i)
-    Mask.push_back(ConstantInt::get(Type::getInt32Ty(Context), Length + i));
-  Op = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
-                                   ConstantVector::get(Mask));
+  Op = VectorHighElements(Op);
 
   // Extend the input elements to the output element type, eg: <2 x float>
   // -> <2 x double>.
@@ -7034,13 +7110,7 @@
   Value *Op = EmitRegister(op0);
 
   // Extract the low elements, eg: <4 x float> -> <2 x float>.
-  unsigned Length = TYPE_VECTOR_SUBPARTS(type);
-  SmallVector<Constant*, 16> Mask;
-  Mask.reserve(Length);
-  for (unsigned i = 0; i != Length; ++i)
-    Mask.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
-  Op = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
-                                   ConstantVector::get(Mask));
+  Op = VectorLowElements(Op);
 
   // Extend the input elements to the output element type, eg: <2 x float>
   // -> <2 x double>.
@@ -7989,6 +8059,14 @@
     RHS = EmitReg_POINTER_PLUS_EXPR(rhs1, rhs2); break;
   case RDIV_EXPR:
     RHS = EmitReg_RDIV_EXPR(rhs1, rhs2); break;
+  case REDUC_MAX_EXPR:
+    RHS = EmitReg_ReducMinMaxExpr(rhs1, ICmpInst::ICMP_UGE, ICmpInst::ICMP_SGE,
+                                  FCmpInst::FCMP_OGE);
+    break;
+  case REDUC_MIN_EXPR:
+    RHS = EmitReg_ReducMinMaxExpr(rhs1, ICmpInst::ICMP_ULE, ICmpInst::ICMP_SLE,
+                                  FCmpInst::FCMP_OLE);
+    break;
   case ROUND_DIV_EXPR:
     RHS = EmitReg_ROUND_DIV_EXPR(rhs1, rhs2); break;
   case RROTATE_EXPR: