[llvm] r353136 - [LSR] Check SCEV on isZero() after extend. PR40514

Max Kazantsev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 4 20:30:37 PST 2019


Author: mkazantsev
Date: Mon Feb  4 20:30:37 2019
New Revision: 353136

URL: http://llvm.org/viewvc/llvm-project?rev=353136&view=rev
Log:
[LSR] Check SCEV on isZero() after extend. PR40514

When LSR first adds SCEVs to BaseRegs, it only does it if `isZero()` has
returned false. In the end, in invocation of `InsertFormula`, it asserts that
all values there are still not zero constants. However between these two
points, it makes some transformations, in particular extends them to wider
type.

SCEV does not give us guarantee that if `S` is not a constant zero, then
`sext(S)` is also not a constant zero. It might have missed some optimizing
transforms when it was calculating `S` and then made them when it took `sext`.
For example, it may happen if previously optimizing transforms were limited
by depth or somehow else.

This patch adds a bailout when we may end up with a zero SCEV after extension.

Differential Revision: https://reviews.llvm.org/D57565
Reviewed By: samparker

Added:
    llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=353136&r1=353135&r2=353136&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Feb  4 20:30:37 2019
@@ -3967,9 +3967,27 @@ void LSRInstance::GenerateTruncates(LSRU
     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
-      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
-      for (const SCEV *&BaseReg : F.BaseRegs)
-        BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+      // Sometimes SCEV is able to prove zero during ext transform. It may
+      // happen if SCEV did not do all possible transforms while creating the
+      // initial node (maybe due to depth limitations), but it can do them while
+      // taking ext.
+      if (F.ScaledReg) {
+        const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+        if (NewScaledReg->isZero())
+         continue;
+        F.ScaledReg = NewScaledReg;
+      }
+      bool HasZeroBaseReg = false;
+      for (const SCEV *&BaseReg : F.BaseRegs) {
+        const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+        if (NewBaseReg->isZero()) {
+          HasZeroBaseReg = true;
+          break;
+        }
+        BaseReg = NewBaseReg;
+      }
+      if (HasZeroBaseReg)
+        continue;
 
       // TODO: This assumes we've done basic processing on all uses and
       // have an idea what the register usage is.

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll?rev=353136&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll Mon Feb  4 20:30:37 2019
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @pluto(i32 %arg) #0 {
+; CHECK-LABEL: @pluto(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB10:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i64 [[LSR_IV_NEXT2:%.*]], i64 addrspace(1)* undef, align 8
+; CHECK-NEXT:    ret i32 [[LSR_IV_NEXT:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2]], [[BB10]] ], [ 9, [[BB:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT]], [[BB10]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], 1
+; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB10]]
+;
+
+bb:
+  br label %bb10
+
+bb1:                                              ; preds = %bb10
+  %tmp = and i64 %tmp24, 4294967295
+  %tmp2 = shl i64 %tmp23, 33
+  %tmp3 = ashr exact i64 %tmp2, 32
+  %tmp4 = add i64 undef, %tmp
+  %tmp5 = add i64 %tmp4, %tmp3
+  %tmp6 = add i64 %tmp5, undef
+  %tmp7 = add i64 %tmp6, undef
+  %tmp8 = add i64 undef, %tmp7
+  store i64 %tmp8, i64 addrspace(1)* undef, align 8
+  %tmp9 = trunc i64 %tmp7 to i32
+  ret i32 %tmp9
+
+bb10:                                             ; preds = %bb10, %bb
+  %tmp11 = phi i64 [ 9, %bb ], [ %tmp24, %bb10 ]
+  %tmp12 = shl i64 undef, 1
+  %tmp13 = mul i64 %tmp12, %tmp12
+  %tmp14 = shl i64 %tmp13, 1
+  %tmp15 = mul i64 %tmp14, %tmp14
+  %tmp16 = shl i64 %tmp15, 1
+  %tmp17 = mul i64 %tmp16, %tmp16
+  %tmp18 = shl i64 %tmp17, 1
+  %tmp19 = mul i64 %tmp18, %tmp18
+  %tmp20 = shl i64 %tmp19, 1
+  %tmp21 = mul i64 %tmp20, %tmp20
+  %tmp22 = shl i64 %tmp21, 1
+  %tmp23 = mul i64 %tmp22, %tmp22
+  %tmp24 = add nuw nsw i64 %tmp11, 1
+  br i1 undef, label %bb1, label %bb10
+}
+
+
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }




More information about the llvm-commits mailing list