[llvm] r181144 - LoopVectorize: Add support for floating point min/max reductions

Mon May 6 07:41:40 PDT 2013

Yes. The patch only handles programmer written min/max functions which are expressed in terms of “select(cmp())”. The commit message might be somewhat misleading.

On May 5, 2013, at 9:30 PM, Owen Anderson <resistor at mac.com> wrote:

> IIRC, a "true" IEEE max/min operation can be considered a reduction even in non-fast-math mode.  If either operand is NaN, then it returns the other one.  Unfortunately, LLVM doesn't model these operations at the moment.
> 
> --Owen
> 
> On May 4, 2013, at 6:54 PM, Arnold Schwaighofer <aschwaighofer at apple.com> wrote:
> 
>> Author: arnolds
>> Date: Sat May  4 20:54:48 2013
>> New Revision: 181144
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=181144&view=rev
>> Log:
>> LoopVectorize: Add support for floating point min/max reductions
>> 
>> Add support for min/max reductions when "no-nans-float-math" is enabled. This
>> allows us to assume we have ordered floating point math and treat ordered and
>> unordered predicates equally.
>> 
>> radar://13723044
>> 
>> Modified:
>>   llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>>   llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll
>> 
>> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=181144&r1=181143&r2=181144&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Sat May  4 20:54:48 2013
>> @@ -335,7 +335,7 @@ public:
>>                            DominatorTree *DT, TargetTransformInfo* TTI,
>>                            AliasAnalysis *AA, TargetLibraryInfo *TLI)
>>      : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
>> -        Induction(0) {}
>> +        Induction(0), HasFunNoNaNAttr(false) {}
>> 
>>  /// This enum represents the kinds of reductions that we support.
>>  enum ReductionKind {
>> @@ -347,7 +347,8 @@ public:
>>    RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
>>    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
>>    RK_FloatAdd,    ///< Sum of floats.
>> -    RK_FloatMult    ///< Product of floats.
>> +    RK_FloatMult,   ///< Product of floats.
>> +    RK_FloatMinMax  ///< Min/max implemented in terms of select(cmp()).
>>  };
>> 
>>  /// This enum represents the kinds of inductions that we support.
>> @@ -365,7 +366,9 @@ public:
>>    MRK_UIntMin,
>>    MRK_UIntMax,
>>    MRK_SIntMin,
>> -    MRK_SIntMax
>> +    MRK_SIntMax,
>> +    MRK_FloatMin,
>> +    MRK_FloatMax
>>  };
>> 
>>  /// This POD struct holds information about reduction variables.
>> @@ -586,6 +589,8 @@ private:
>>  /// We need to check that all of the pointers in this list are disjoint
>>  /// at runtime.
>>  RuntimePointerCheck PtrRtCheck;
>> +  /// Can we assume the absence of NaNs.
>> +  bool HasFunNoNaNAttr;
>> };
>> 
>> /// LoopVectorizationCostModel - estimates the expected speedups due to
>> @@ -1648,6 +1653,8 @@ getReductionBinOp(LoopVectorizationLegal
>>      return Instruction::FAdd;
>>    case LoopVectorizationLegality::RK_IntegerMinMax:
>>      return Instruction::ICmp;
>> +    case LoopVectorizationLegality::RK_FloatMinMax:
>> +      return Instruction::FCmp;
>>    default:
>>      llvm_unreachable("Unknown reduction operation");
>>  }
>> @@ -1672,8 +1679,21 @@ Value *createMinMaxOp(IRBuilder<> &Build
>>    break;
>>  case LoopVectorizationLegality::MRK_SIntMax:
>>    P = CmpInst::ICMP_SGT;
>> +    break;
>> +  case LoopVectorizationLegality::MRK_FloatMin:
>> +    P = CmpInst::FCMP_OLT;
>> +    break;
>> +  case LoopVectorizationLegality::MRK_FloatMax:
>> +    P = CmpInst::FCMP_OGT;
>> +    break;
>>  }
>> -  Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
>> +
>> +  Value *Cmp;
>> +  if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
>> +    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
>> +  else
>> +    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
>> +
>>  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
>>  return Select;
>> }
>> @@ -1743,11 +1763,12 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
>>    // one for multiplication, -1 for And.
>>    Value *Identity;
>>    Value *VectorStart;
>> -    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax)
>> +    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
>> +        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
>>      // MinMax reduction have the start value as their identify.
>>      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
>>                                                         "minmax.ident");
>> -    else {
>> +    } else {
>>      Constant *Iden =
>>        LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
>>                                                        VecTy->getScalarType());
>> @@ -1801,7 +1822,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
>>    Value *ReducedPartRdx = RdxParts[0];
>>    unsigned Op = getReductionBinOp(RdxDesc.Kind);
>>    for (unsigned part = 1; part < UF; ++part) {
>> -      if (Op != Instruction::ICmp)
>> +      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
>>        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
>>                                             RdxParts[part], ReducedPartRdx,
>>                                             "bin.rdx");
>> @@ -1832,7 +1853,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
>>                                    ConstantVector::get(ShuffleMask),
>>                                    "rdx.shuf");
>> 
>> -      if (Op != Instruction::ICmp)
>> +      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
>>        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
>>                                     "bin.rdx");
>>      else
>> @@ -2363,6 +2384,13 @@ bool LoopVectorizationLegality::canVecto
>>    return false;
>>  }
>> 
>> +  // Look for the attribute signaling the absence of NaNs.
>> +  Function &F = *Header->getParent();
>> +  if (F.hasFnAttribute("no-nans-fp-math"))
>> +    HasFunNoNaNAttr = F.getAttributes().getAttribute(
>> +      AttributeSet::FunctionIndex,
>> +      "no-nans-fp-math").getValueAsString() == "true";
>> +
>>  // For each block in the loop.
>>  for (Loop::block_iterator bb = TheLoop->block_begin(),
>>       be = TheLoop->block_end(); bb != be; ++bb) {
>> @@ -2444,6 +2472,10 @@ bool LoopVectorizationLegality::canVecto
>>          DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
>>          continue;
>>        }
>> +        if (AddReductionVar(Phi, RK_FloatMinMax)) {
>> +          DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
>> +          continue;
>> +        }
>> 
>>        DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
>>        return false;
>> @@ -2869,7 +2901,7 @@ bool LoopVectorizationLegality::AddReduc
>>  // such that we don't stop when we see the phi has two uses (one by the select
>>  // and one by the icmp) and to make sure we only see exactly the two
>>  // instructions.
>> -  unsigned NumICmpSelectPatternInst = 0;
>> +  unsigned NumCmpSelectPatternInst = 0;
>>  ReductionInstDesc ReduxDesc(false, 0);
>> 
>>  // Avoid cycles in the chain.
>> @@ -2918,7 +2950,7 @@ bool LoopVectorizationLegality::AddReduc
>> 
>>      // We can't have multiple inside users except for a combination of
>>      // icmp/select both using the phi.
>> -      if (FoundInBlockUser && !NumICmpSelectPatternInst)
>> +      if (FoundInBlockUser && !NumCmpSelectPatternInst)
>>        return false;
>>      FoundInBlockUser = true;
>> 
>> @@ -2927,14 +2959,15 @@ bool LoopVectorizationLegality::AddReduc
>>      if (!ReduxDesc.IsReduction)
>>        return false;
>> 
>> -      if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
>> -                                       isa<SelectInst>(U)))
>> -          ++NumICmpSelectPatternInst;
>> +      if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
>> +          ++NumCmpSelectPatternInst;
>> +      if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
>> +          ++NumCmpSelectPatternInst;
>> 
>>      // Reductions of instructions such as Div, and Sub is only
>>      // possible if the LHS is the reduction variable.
>>      if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
>> -          !isa<ICmpInst>(U) && U->getOperand(0) != Iter)
>> +          !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
>>        return false;
>> 
>>      Iter = ReduxDesc.PatternLastInst;
>> @@ -2942,7 +2975,8 @@ bool LoopVectorizationLegality::AddReduc
>> 
>>    // This means we have seen one but not the other instruction of the
>>    // pattern or more than just a select and cmp.
>> -    if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
>> +    if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
>> +        NumCmpSelectPatternInst != 2)
>>      return false;
>> 
>>    // We found a reduction var if we have reached the original
>> @@ -2968,16 +3002,17 @@ bool LoopVectorizationLegality::AddReduc
>> /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
>> /// pattern corresponding to a min(X, Y) or max(X, Y).
>> LoopVectorizationLegality::ReductionInstDesc
>> -LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) {
>> +LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
>> +                                                    ReductionInstDesc &Prev) {
>> 
>> -  assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
>> +  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
>>         "Expect a select instruction");
>> -  ICmpInst *Cmp = 0;
>> +  Instruction *Cmp = 0;
>>  SelectInst *Select = 0;
>> 
>>  // We must handle the select(cmp()) as a single instruction. Advance to the
>>  // select.
>> -  if ((Cmp = dyn_cast<ICmpInst>(I))) {
>> +  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
>>    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
>>      return ReductionInstDesc(false, I);
>>    return ReductionInstDesc(Select, Prev.MinMaxKind);
>> @@ -2986,7 +3021,8 @@ LoopVectorizationLegality::isMinMaxSelec
>>  // Only handle single use cases for now.
>>  if (!(Select = dyn_cast<SelectInst>(I)))
>>    return ReductionInstDesc(false, I);
>> -  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
>> +  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
>> +      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
>>    return ReductionInstDesc(false, I);
>>  if (!Cmp->hasOneUse())
>>    return ReductionInstDesc(false, I);
>> @@ -3003,6 +3039,14 @@ LoopVectorizationLegality::isMinMaxSelec
>>    return ReductionInstDesc(Select, MRK_SIntMax);
>>  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
>>    return ReductionInstDesc(Select, MRK_SIntMin);
>> +  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
>> +    return ReductionInstDesc(Select, MRK_FloatMin);
>> +  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
>> +    return ReductionInstDesc(Select, MRK_FloatMax);
>> +  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
>> +    return ReductionInstDesc(Select, MRK_FloatMin);
>> +  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
>> +    return ReductionInstDesc(Select, MRK_FloatMax);
>> 
>>  return ReductionInstDesc(false, I);
>> }
>> @@ -3017,7 +3061,8 @@ LoopVectorizationLegality::isReductionIn
>>  default:
>>    return ReductionInstDesc(false, I);
>>  case Instruction::PHI:
>> -      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
>> +      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
>> +                 Kind != RK_FloatMinMax))
>>        return ReductionInstDesc(false, I);
>>    return ReductionInstDesc(I, Prev.MinMaxKind);
>>  case Instruction::Sub:
>> @@ -3035,9 +3080,11 @@ LoopVectorizationLegality::isReductionIn
>>    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
>>  case Instruction::FAdd:
>>    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
>> +  case Instruction::FCmp:
>>  case Instruction::ICmp:
>>  case Instruction::Select:
>> -    if (Kind != RK_IntegerMinMax)
>> +    if (Kind != RK_IntegerMinMax &&
>> +        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
>>      return ReductionInstDesc(false, I);
>>    return isMinMaxSelectCmpPattern(I, Prev);
>>  }
>> 
>> Modified: llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll?rev=181144&r1=181143&r2=181144&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/minmax_reduction.ll Sat May  4 20:54:48 2013
>> @@ -3,6 +3,8 @@
>> target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> 
>> @A = common global [1024 x i32] zeroinitializer, align 16
>> + at fA = common global [1024 x float] zeroinitializer, align 16
>> + at dA = common global [1024 x double] zeroinitializer, align 16
>> 
>> ; Signed tests.
>> 
>> @@ -403,3 +405,481 @@ for.body:
>> for.end:
>>  ret i32 %max.red.0
>> }
>> +
>> +; Float tests.
>> +
>> +; Maximum.
>> +
>> +; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
>> +; CHECK: @max_red_float
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @max_red_float(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ogt float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @max_red_float_ge
>> +; CHECK: fcmp oge <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @max_red_float_ge(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp oge float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @inverted_max_red_float
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_max_red_float(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp olt float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @inverted_max_red_float_le
>> +; CHECK: fcmp ole <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_max_red_float_le(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ole float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @unordered_max_red
>> +; CHECK: fcmp ugt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @unordered_max_red_float(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ugt float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @unordered_max_red_float_ge
>> +; CHECK: fcmp uge <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @unordered_max_red_float_ge(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp uge float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @inverted_unordered_max_red
>> +; CHECK: fcmp ult <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_unordered_max_red_float(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ult float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; CHECK: @inverted_unordered_max_red_float_le
>> +; CHECK: fcmp ule <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_unordered_max_red_float_le(float %max) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ule float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +; Minimum.
>> +
>> +; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
>> +; CHECK: @min_red_float
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @min_red_float(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp olt float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @min_red_float_le
>> +; CHECK: fcmp ole <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @min_red_float_le(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ole float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @inverted_min_red_float
>> +; CHECK: fcmp ogt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_min_red_float(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ogt float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @inverted_min_red_float_ge
>> +; CHECK: fcmp oge <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_min_red_float_ge(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp oge float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @unordered_min_red
>> +; CHECK: fcmp ult <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @unordered_min_red_float(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ult float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @unordered_min_red_float_le
>> +; CHECK: fcmp ule <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @unordered_min_red_float_le(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ule float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @inverted_unordered_min_red
>> +; CHECK: fcmp ugt <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_unordered_min_red_float(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ugt float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; CHECK: @inverted_unordered_min_red_float_ge
>> +; CHECK: fcmp uge <2 x float>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x float>
>> +; CHECK: select <2 x i1>
>> +
>> +define float @inverted_unordered_min_red_float_ge(float %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp uge float %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %min.red.0
>> +}
>> +
>> +; Make sure we handle doubles, too.
>> +; CHECK: @min_red_double
>> +; CHECK: fcmp olt <2 x double>
>> +; CHECK: select <2 x i1>
>> +; CHECK: middle.block
>> +; CHECK: fcmp olt <2 x double>
>> +; CHECK: select <2 x i1>
>> +
>> +define double @min_red_double(double %min) #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x double]* @dA, i64 0, i64 %indvars.iv
>> +  %0 = load double* %arrayidx, align 4
>> +  %cmp3 = fcmp olt double %0, %min.red.08
>> +  %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret double %min.red.0
>> +}
>> +
>> +
>> +; Don't this into a max reduction. The no-nans-fp-math attribute is missing
>> +; CHECK: @max_red_float_nans
>> +; CHECK-NOT: <2 x float>
>> +
>> +define float @max_red_float_nans(float %max) {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
>> +  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4
>> +  %cmp3 = fcmp ogt float %0, %max.red.08
>> +  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
>> +  %indvars.iv.next = add i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> +  br i1 %exitcond, label %for.end, label %for.body
>> +
>> +for.end:
>> +  ret float %max.red.0
>> +}
>> +
>> +
>> +attributes #0 = { "no-nans-fp-math"="true" } 
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits