[Bug 16405] New: Missed scalar optimization

           Summary: Missed scalar optimization
# cat 1.cc
double *f(double *p, int n) {
  for (int i = 0; i < n; ++i)
  return p;

# clang++ -S -emit-llvm -O2 1.cc -o -

define double* @_Z1fPdi(double* %p, i32 %n) #0 {
  %cmp2 = icmp sgt i32 %n, 0
  br i1 %cmp2, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  %0 = add i32 %n, -1
  %1 = zext i32 %0 to i64
  %2 = add i64 %1, 1
  %scevgep = getelementptr double* %p, i64 %2
  br label %for.end

for.end:                                          ; preds = %for.body.lr.ph,
  %p.addr.0.lcssa = phi double* [ %scevgep, %for.body.lr.ph ], [ %p, %entry ]
  ret double* %p.addr.0.lcssa

Looks like in for.body.lr.ph BB %n > 0. This allows replacing the first 3
instructions with one zext (or sext). This would also result in simpler X86
asm, too. Current code compiles to:

        testl   %esi, %esi
        jle     .LBB0_2
# BB#1:                                 # %for.body.lr.ph
        decl    %esi
        leaq    8(%rdi,%rsi,8), %rdi
.LBB0_2:                                # %for.end
        movq    %rdi, %rax

