[llvm-bugs] [Bug 25548] New: [ppc] unnecessary cr instructions and isel instruction

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Nov 16 16:15:08 PST 2015


https://llvm.org/bugs/show_bug.cgi?id=25548

            Bug ID: 25548
           Summary: [ppc] unnecessary cr instructions and isel instruction
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: PowerPC
          Assignee: unassignedbugs at nondot.org
          Reporter: carrot at google.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

The following function is from 453.povray

bool Ray_In_Bound (RAY *Ray, OBJECT *Bounding_Object)
{
  OBJECT *Bound;
  INTERSECTION Local;

  for (Bound = Bounding_Object; Bound != NULL; Bound = Bound->Sibling)
  {
    Increase_Counter(stats[Bounding_Region_Tests]);

    if (!Intersection (&Local, Bound, Ray))
    {    
      if (!Inside_Object(Ray->Initial, Bound))
      {    
        return (false);
      }    
    }    
    Increase_Counter(stats[Bounding_Region_Tests_Succeeded]);
  }

  return (true);
}

When compiled with options -O2 -m64 -mvsx -mcpu=power8, I got following code:

_ZN3pov12Ray_In_BoundEPNS_10Ray_StructEPNS_13Object_StructE: #
@_ZN3pov12Ray_In_BoundEPNS_10Ray_StructEPNS_13Object_StructE
.Lfunc_begin1:
        .cfi_startproc
.Ltmp12:
        addis 2, 12, .TOC.-.Ltmp12 at ha
        addi 2, 2, .TOC.-.Ltmp12 at l
.Ltmp13:
        .localentry    
_ZN3pov12Ray_In_BoundEPNS_10Ray_StructEPNS_13Object_StructE, .Ltmp13-.Ltmp12
# BB#0:                                 # %entry
        mfcr 12
        mflr 0
        std 31, -8(1)
        std 0, 16(1)
        stw 12, 8(1)
        stdu 1, -336(1)
        mr 31, 1
        creqv 8, 8, 8                   // A
        std 29, 312(31)                 # 8-byte Folded Spill
        std 30, 320(31)                 # 8-byte Folded Spill
        mr 30, 4
        mr 29, 3
        std 27, 296(31)                 # 8-byte Folded Spill
        std 28, 304(31)                 # 8-byte Folded Spill
        cmpldi   30, 0
        beq      0, .LBB1_6
# BB#1:                                 # %for.body.lr.ph
        addis 3, 2, .LC0 at toc@ha
        addi 28, 31, 96
        crxor 9, 9, 9                   // B
        ld 27, .LC0 at toc@l(3)
        .align  4
.LBB1_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        ld 3, 704(27)
        mr 4, 30
        mr 5, 29
        addi 3, 3, 1 
        std 3, 704(27)
        mr 3, 28
        bl
_ZN3pov12IntersectionEPNS_10istk_entryEPNS_13Object_StructEPNS_10Ray_StructE
        andi. 3, 3, 1 
        bc 12, 1, .LBB1_4
# BB#3:                                 # %if.then
                                        #   in Loop: Header=BB1_2 Depth=1
        mr 3, 29
        mr 4, 30
        bl _ZN3pov13Inside_ObjectEPdPNS_13Object_StructE
        cmpldi   3, 0
        beq      0, .LBB1_5
.LBB1_4:                                # %if.end.3
                                        #   in Loop: Header=BB1_2 Depth=1
        ld 3, 712(27)
        addi 3, 3, 1
        std 3, 712(27)
        ori 2, 2, 0
        ld 30, 16(30)
        cmpldi   30, 0
        bne      0, .LBB1_2
        b .LBB1_6
.LBB1_5:
        crmove   8, 9                   // C
.LBB1_6:                                # %cleanup
        li 3, 0                         // D
        li 4, 1                         // E
        ld 30, 320(31)                  # 8-byte Folded Reload
        ld 29, 312(31)                  # 8-byte Folded Reload
        ld 28, 304(31)                  # 8-byte Folded Reload
        ld 27, 296(31)                  # 8-byte Folded Reload
        isel 3, 4, 3, 8                 // F
        addi 1, 1, 336
        ld 0, 16(1)
        lwz 12, 8(1)
        ld 31, -8(1)
        mtocrf 32, 12
        mtlr 0
        blr


Instructions ABC use condition register to represent the result, and
instructions DEF set actual result according to the cr value. 

We can use simpler instructions. Instruction A can set reg 8 to 1, instruction
C can set reg 8 to 0, then instructions BDEF can be removed. Note that all slow
cr manipulation instructions and isel have been removed.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20151117/49575a6b/attachment.html>


More information about the llvm-bugs mailing list