[llvm-commits] [llvm] r114506 - in /llvm/trunk: lib/Target/ARM/ARMBaseInstrInfo.cpp test/CodeGen/ARM/2010-09-21-OptCmpBug.ll

Gabor Greif ggreif at gmail.com
Wed Sep 22 03:50:57 PDT 2010


Hi Evan, All,

my observations inline below...

On Sep 22, 1:49 am, Evan Cheng <evan.ch... at apple.com> wrote:
> Author: evancheng
> Date: Tue Sep 21 18:49:07 2010
> New Revision: 114506
>
> URL:http://llvm.org/viewvc/llvm-project?rev=114506&view=rev
> Log:
> OptimizeCompareInstr should avoid iterating pass the beginning of the MBB when the 'and' instruction is after the comparison.
>
> Added:
>     llvm/trunk/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
> Modified:
>     llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
>
> Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
> URL:http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBase...
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Sep 21 18:49:07 2010
> @@ -1455,7 +1455,8 @@
>
>    // Check that CPSR isn't set between the comparison instruction and the one we
>    // want to change.
> -  MachineBasicBlock::const_iterator I = CmpInstr, E = MI;
> +  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
> +    B = MI->getParent()->begin();
>    --I;
>    for (; I != E; --I) {
>      const MachineInstr &Instr = *I;
> @@ -1469,6 +1470,10 @@
>        if (MO.getReg() == ARM::CPSR)
>          return false;
>      }
> +
> +    if (I == B)
> +      // The 'and' is below the comparison instruction.
> +      return false;
>    }
>
>    // Set the "zero" bit in CPSR.
>
> Added: llvm/trunk/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
> URL:http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-...
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll Tue Sep 21 18:49:07 2010
> @@ -0,0 +1,84 @@
> +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
> +
> +declare noalias i8* @malloc(i32) nounwind
> +
> +define internal void @gl_DrawPixels(i32 %width, i32 %height, i32 %format, i32 %type, i8* %pixels) nounwind {
> +entry:
> +  br i1 undef, label %bb3.i, label %bb3
> +
> +bb3.i:                                            ; preds = %entry
> +  unreachable
> +
> +gl_error.exit:                                    ; preds = %bb22
> +  ret void
> +
> +bb3:                                              ; preds = %entry
> +  br i1 false, label %bb5, label %bb4
> +
> +bb4:                                              ; preds = %bb3
> +  br label %bb5
> +
> +bb5:                                              ; preds = %bb4, %bb3
> +  br i1 undef, label %bb19, label %bb22
> +
> +bb19:                                             ; preds = %bb5
> +  switch i32 %type, label %bb3.i6.i [
> +    i32 5120, label %bb1.i13
> +    i32 5121, label %bb1.i13
> +    i32 6656, label %bb9.i.i6
> +  ]
> +
> +bb9.i.i6:                                         ; preds = %bb19
> +  br label %bb1.i13
> +
> +bb3.i6.i:                                         ; preds = %bb19
> +  unreachable
> +
> +bb1.i13:                                          ; preds = %bb9.i.i6, %bb19, %bb19
> +  br i1 undef, label %bb3.i17, label %bb2.i16
> +
> +bb2.i16:                                          ; preds = %bb1.i13
> +  unreachable
> +
> +bb3.i17:                                          ; preds = %bb1.i13
> +  br i1 undef, label %bb4.i18, label %bb23.i
> +
> +bb4.i18:                                          ; preds = %bb3.i17
> +  %0 = mul nsw i32 %height, %width
> +  %1 = and i32 %0, 7
> +  %not..i = icmp ne i32 %1, 0
> +  %2 = zext i1 %not..i to i32
> +  %storemerge2.i = add i32 0, %2
> +  %3 = call noalias i8* @malloc(i32 %storemerge2.i) nounwind
> +  br i1 undef, label %bb3.i9, label %bb9.i

This seems to instruction select to:

BB#10: derived from LLVM BB %bb4.i18
    Predecessors according to CFG: BB#9
        %reg16402<def> = COPY %reg16385; rGPR:%reg16402 GPR:%reg16385
        %reg16403<def> = COPY %reg16384; rGPR:%reg16403 GPR:%reg16384
        %reg16401<def> = t2MUL %reg16402, %reg16403, pred:14, pred:
%reg0; rGPR:%reg16401,16402,16403
        %reg16404<def> = t2MOVi 0, pred:14, pred:%reg0, opt:%reg0;
rGPR:%reg16404
        t2TSTri %reg16401, 7, pred:14, pred:%reg0, %CPSR<imp-def>;
rGPR:%reg16401
        %reg16405<def> = t2MOVCCi %reg16404, 1, pred:1, pred:%CPSR;
rGPR:%reg16405,16404
        ADJCALLSTACKDOWN 0, pred:14, pred:%reg0, %SP<imp-def,dead>,
%SP<imp-use>
        %R0<def> = COPY %reg16405; rGPR:%reg16405
        tBLXi_r9 <ga:@malloc>, %R0, %R0<imp-def>, %R1<imp-def,dead>,
%R3<imp-def,dead>, %CPSR<imp-def,dead>, ...
        ADJCALLSTACKUP 0, 0, pred:14, pred:%reg0, %SP<imp-def,dead>,
%SP<imp-use>
        %reg16407<def> = t2ANDri %reg16401, 7, pred:14, pred:%reg0,
opt:%reg0; rGPR:%reg16407,16401
        %reg16387<def> = COPY %reg16407; GPR:%reg16387 rGPR:%reg16407
        %reg16408<def> = IMPLICIT_DEF; GPR:%reg16408
        t2CMPzri %reg16408<kill>, 0, pred:14, pred:%reg0, %CPSR<imp-
def>; GPR:%reg16408
        t2Bcc <BB#18>, pred:1, pred:%CPSR
    Successors according to CFG: BB#18 BB#11

The TST and the AND indeed both consume %reg16401 and the AND is below
the TST. Your fix is correct to bail out at the start of the BB.

But I see a missed opportunity here, and I'd like to understand
whether our arsenal is good enough to recognize it.

Finally the above ends up to output:

@ BB#4:                                 @ %bb.i24.i.preheader
        and     r0, r4, #7
        movs    r1, #0        ;; why movS? can we change to mov?
        cmp     r0, #0        ;; equivalent to tst r0, #7, elidable
        it      ne
        movne   r1, #1
        b       LBB0_6

But I guess the optimal sequence would be:

@ BB#4:                                 @ %bb.i24.i.preheader
        andS     r0, r4, #7
        ite     ne
        movne   r1, #1
        moveq   r1, #0  ;; or perhaps equivalently: moveq r1, r0
        b       LBB0_6

Please correct me if I missed something :-)

Anyway, to get here, I must understand a few things:

        %reg16387<def> = COPY %reg16407; GPR:%reg16387 rGPR:%reg16407

1) Why is this copy present at all?

        %reg16408<def> = IMPLICIT_DEF; GPR:%reg16408

2) What is defined implicitly here?

3) Third question: which MachineInst is responsible for the "movs r1,
#0" above?

Hopefully someone can explain this all to me ;-)

Cheers,

   Gabor


> +
> +bb9.i:                                            ; preds = %bb4.i18
> +  br i1 undef, label %bb13.i19, label %bb.i24.i
> +
> +bb13.i19:                                         ; preds = %bb9.i
> +  br i1 undef, label %bb14.i20, label %bb15.i
> +
> +bb14.i20:                                         ; preds = %bb13.i19
> +  unreachable
> +
> +bb15.i:                                           ; preds = %bb13.i19
> +  unreachable
> +
> +bb.i24.i:                                         ; preds = %bb.i24.i, %bb9.i
> +  %storemerge1.i21.i = phi i32 [ %4, %bb.i24.i ], [ 0, %bb9.i ]
> +  %4 = add i32 %storemerge1.i21.i, 1
> +  %exitcond47.i = icmp eq i32 %4, %storemerge2.i
> +  br i1 %exitcond47.i, label %bb22, label %bb.i24.i
> +
> +bb23.i:                                           ; preds = %bb3.i17
> +  unreachable
> +
> +bb3.i9:                                           ; preds = %bb4.i18
> +  unreachable
> +
> +bb22:                                             ; preds = %bb.i24.i, %bb5
> +  br i1 undef, label %gl_error.exit, label %bb23
> +
> +bb23:                                             ; preds = %bb22
> +  ret void
> +}
>
> _______________________________________________
> llvm-commits mailing list
> llvm-comm... at cs.uiuc.eduhttp://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list