[llvm] r216160 - [INDVARS] Extend using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions.

Thu Aug 21 04:03:47 PDT 2014

----- Original Message -----
> From: "Zinovy Nis" <zinovy.nis at gmail.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Thursday, August 21, 2014 3:25:45 AM
> Subject: [llvm] r216160 - [INDVARS] Extend using of widening of induction	variables for the cases of "sub nsw" and
> "mul nsw" instructions.
> 
> Author: zinovy.nis
> Date: Thu Aug 21 03:25:45 2014
> New Revision: 216160
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=216160&view=rev
> Log:
> [INDVARS] Extend using of widening of induction variables for the
> cases of "sub nsw" and "mul nsw" instructions.
> 
> Currently only "add nsw" are widened. This patch eliminates tons of
> "sext" instructions for 64 bit code (and the corresponding target
> code) in cases like:
> 
> int N = 100;
> float **A;
> 
> void foo(int x0, int x1)
> {
>         float * A_cur = &A[0][0];
>         float * A_next = &A[1][0];
>         for(int x = x0; x < x1; ++x).
>         {
>           // Currently only [x+N] case is widened. Others 2 cases
>           lead to sext.
>           // This patch fixes it, so all 3 cases do not need sext.
>           const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x *
>           N];
>           A_next[x] = div;
>         }
> }
> ...
> > clang++ test.cpp -march=core-avx2 -Ofast  -fno-unroll-loops
> > -fno-tree-vectorize -S -o -
> 
> Differential Revision: http://reviews.llvm.org/D4695
> 
> 
> Modified:
>     llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
>     llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
> 
> Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=216160&r1=216159&r2=216160&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Thu Aug 21
> 03:25:45 2014
> @@ -757,6 +757,9 @@ protected:
>  
>    const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse
>    DU);
>  
> +  const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
> +                              unsigned OpCode) const;
> +
>    Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander
>    &Rewriter);
>  
>    void pushNarrowIVUsers(Instruction *NarrowDef, Instruction
>    *WideDef);
> @@ -833,13 +836,30 @@ Instruction *WidenIV::CloneIVUser(Narrow
>    }
>  }
>  
> +const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV
> *RHS,
> +                                     unsigned OpCode) const {
> +  if (OpCode == Instruction::Add)
> +    return SE->getAddExpr(LHS, RHS);
> +  if (OpCode == Instruction::Sub)
> +    return SE->getMinusSCEV(LHS, RHS);
> +  if (OpCode == Instruction::Mul)
> +    return SE->getMulExpr(LHS, RHS);
> +
> +  llvm_unreachable("Unsupported opcode.");
> +  return nullptr;

You should remove the 'return nullptr' here. llvm_unreachable does not return (I'm actually surprised we don't get a warning here).

 -Hal

> +}
> +
>  /// No-wrap operations can transfer sign extension of their result
>  to their
>  /// operands. Generate the SCEV value for the widened operation
>  without
>  /// actually modifying the IR yet. If the expression after extending
>  the
>  /// operands is an AddRec for this loop, return it.
>  const SCEVAddRecExpr*
>  WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
> +
>    // Handle the common case of add<nsw/nuw>
> -  if (DU.NarrowUse->getOpcode() != Instruction::Add)
> +  const unsigned OpCode = DU.NarrowUse->getOpcode();
> +  // Only Add/Sub/Mul instructions supported yet.
> +  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
> +      OpCode != Instruction::Mul)
>      return nullptr;
>  
>    // One operand (NarrowDef) has already been extended to WideDef.
>    Now determine
> @@ -859,14 +879,13 @@ const SCEVAddRecExpr* WidenIV::GetExtend
>    else
>      return nullptr;
>  
> -  // When creating this AddExpr, don't apply the current operations
> NSW or NUW
> +  // When creating this SCEV expr, don't apply the current
> operations NSW or NUW
>    // flags. This instruction may be guarded by control flow that the
>    no-wrap
>    // behavior depends on. Non-control-equivalent instructions can be
>    mapped to
>    // the same SCEV expression, and it would be incorrect to transfer
>    NSW/NUW
>    // semantics to those operations.
>    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
> -    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
> -
> +      GetSCEVByOpCode(SE->getSCEV(DU.WideDef), ExtendOperExpr,
> OpCode));
>    if (!AddRec || AddRec->getLoop() != L)
>      return nullptr;
>    return AddRec;
> 
> Modified:
> llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll?rev=216160&r1=216159&r2=216160&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
> (original)
> +++ llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
> Thu Aug 21 03:25:45 2014
> @@ -1,6 +1,6 @@
>  ; RUN: opt < %s -indvars -S | FileCheck %s
>  ; Test WidenIV::GetExtendedOperandRecurrence.
> -; add219 should be extended to i64 because it is nsw, even though
> its
> +; %add, %sub and %mul should be extended to i64 because it is nsw,
> even though its
>  ; sext cannot be hoisted outside the loop.
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
> @@ -18,13 +18,22 @@ for.body153:
>    br i1 undef, label %for.body170, label %for.body153
>  
>  ; CHECK: add nsw i64 %indvars.iv, 1
> +; CHECK: sub nsw i64 %indvars.iv, 2
> +; CHECK: mul nsw i64 %indvars.iv, 4
>  for.body170:                                      ; preds =
>  %for.body170, %for.body153
>    %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
> -  %add219 = add nsw i32 %i2.19, 1
> -  %idxprom220 = sext i32 %add219 to i64
> +
> +  %add = add nsw i32 %i2.19, 1
> +  %add.idxprom = sext i32 %add to i64
> +
> +  %sub = sub nsw i32 %i2.19, 2
> +  %sub.idxprom = sext i32 %sub to i64
> +
> +  %mul = mul nsw i32 %i2.19, 4
> +  %mul.idxprom = sext i32 %mul to i64
> +
>    %add249 = add nsw i32 %i2.19, %shl132
>    br label %for.body170
> -
>  for.end285:                                       ; preds = %entry
>    ret void
>  }
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory