[PATCH] [INDVARS]

Zinovy Nis zinovy.nis at gmail.com
Mon Jul 28 05:42:29 PDT 2014


Hi atrick, rafael,

This patch extends using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions. Currently only "add nsw" are widened. 
This patch eliminates tons of "sext" instructions for 64 bit code (and the corresponding target code) in cases like:


```
int N = 100;
float **A;

void foo(int x0, int x1)
{
        float * A_cur = &A[0][0];
        float * A_next = &A[1][0];
        for(int x = x0; x < x1; ++x).
        {
          // Currently only [x+N] case is widened. Others 2 cases lead to sext.
          // This patch fixes it, so all 3 cases do not need sext.
          const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x * N];
          A_next[x] = div;
        }
}
...
> clang++ test.cpp -march=core-avx2 -Ofast -S -o -
```

(with my patch)


```
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%rdi,%rcx,4), %xmm0
        vaddss  (%rdx,%rcx,4), %xmm0, %xmm0
        vaddss  (%rax), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addq    %r9, %rax
        cmpl    %esi, %ecx
        jl      .LBB0_2

```

vs trunk:


```
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%r10,%rcx,4), %xmm0
        leal    (%r11,%rcx), %edx
        movslq  %edx, %rdx
        vaddss  (%rax,%rdx,4), %xmm0, %xmm0
        movslq  %edi, %rdi
        vaddss  (%rax,%rdi,4), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addl    %r9d, %edi
        cmpl    %esi, %ecx
        jl      .LBB0_2

```

http://reviews.llvm.org/D4695

Files:
  lib/Transforms/Scalar/IndVarSimplify.cpp
  test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll

Index: lib/Transforms/Scalar/IndVarSimplify.cpp
===================================================================
--- lib/Transforms/Scalar/IndVarSimplify.cpp
+++ lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -757,6 +757,9 @@
 
   const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
 
+  const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+                              unsigned OpCode) const;
+
   Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
 
   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
@@ -833,13 +836,28 @@
   }
 }
 
+const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+                                     unsigned OpCode) const {
+  if (OpCode == Instruction::Add)
+    return SE->getAddExpr(LHS, RHS);
+  if (OpCode == Instruction::Sub)
+    return SE->getMinusSCEV(LHS, RHS);
+  if (OpCode == Instruction::Mul)
+    return SE->getMulExpr(LHS, RHS);
+  return nullptr;
+}
+
 /// No-wrap operations can transfer sign extension of their result to their
 /// operands. Generate the SCEV value for the widened operation without
 /// actually modifying the IR yet. If the expression after extending the
 /// operands is an AddRec for this loop, return it.
 const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+
   // Handle the common case of add<nsw/nuw>
-  if (DU.NarrowUse->getOpcode() != Instruction::Add)
+  const unsigned OpCode = DU.NarrowUse->getOpcode();
+  // Only Add/Sub/Mul instructions supported yet.
+  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+      OpCode != Instruction::Mul)
     return nullptr;
 
   // One operand (NarrowDef) has already been extended to WideDef. Now determine
@@ -859,14 +877,13 @@
   else
     return nullptr;
 
-  // When creating this AddExpr, don't apply the current operations NSW or NUW
+  // When creating this SCEV expr, don't apply the current operations NSW or NUW
   // flags. This instruction may be guarded by control flow that the no-wrap
   // behavior depends on. Non-control-equivalent instructions can be mapped to
   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
   // semantics to those operations.
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
-
+      GetSCEVByOpCode(SE->getSCEV(DU.WideDef), ExtendOperExpr, OpCode));
   if (!AddRec || AddRec->getLoop() != L)
     return nullptr;
   return AddRec;
Index: test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
===================================================================
--- test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
-; add219 should be extended to i64 because it is nsw, even though its
+; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -18,13 +18,22 @@
   br i1 undef, label %for.body170, label %for.body153
 
 ; CHECK: add nsw i64 %indvars.iv, 1
+; CHECK: sub nsw i64 %indvars.iv, 2
+; CHECK: mul nsw i64 %indvars.iv, 4
 for.body170:                                      ; preds = %for.body170, %for.body153
   %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
-  %add219 = add nsw i32 %i2.19, 1
-  %idxprom220 = sext i32 %add219 to i64
+
+  %add = add nsw i32 %i2.19, 1
+  %add.idxprom = sext i32 %add to i64
+
+  %sub = sub nsw i32 %i2.19, 2
+  %sub.idxprom = sext i32 %sub to i64
+
+  %mul = mul nsw i32 %i2.19, 4
+  %mul.idxprom = sext i32 %mul to i64
+
   %add249 = add nsw i32 %i2.19, %shl132
   br label %for.body170
-
 for.end285:                                       ; preds = %entry
   ret void
 }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4695.11943.patch
Type: text/x-patch
Size: 4035 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140728/8c787dcb/attachment.bin>


More information about the llvm-commits mailing list