[llvm-bugs] [Bug 27740] New: extra memory load in loop

Fri May 13 14:51:44 PDT 2016

https://llvm.org/bugs/show_bug.cgi?id=27740

            Bug ID: 27740
           Summary: extra memory load in loop
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: carrot at google.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

Compile following code with trunk llvm

$ ~/llvm/obj2/bin/clang --target=powerpc64le-grtev4-linux-gnu -O2
-fno-unroll-loops -c t17.c -save-temps

struct s{
  int f0;
  int f1;
  float f2;
  float f3;
  float f4;
};

static void bar(struct s *p1, struct s *p2)
{
  if (p1->f0 && p1->f3 < p2->f3)
     p1->f3 = p2->f3;
  else
     p1->f3 = p2->f3;    // p1->f4 = p2->f4;
}

void foo(struct s *p1, struct s *p2, int size)
{
  for (int i=0; i<size; i++)
  {
    p1->f1 += p2->f1;
    p1->f2 -= p2->f2;
    bar(p1, p2);
    p2++;
  }
}

LLVM generates following code for the loop body

.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        lwzu 9, 20(4)          // A1
        lxsspx 1, 3, 7         //    B1
        lwz 10, 4(3)           // A2
        lxsspx 0, 4, 6         //    B2
        add 9, 10, 9           // A3
        stw 9, 4(3)            // A4
        xssubsp 0, 1, 0        //    B3
        stxsspx 0, 3, 7        //    B4
        bc 12, 2, .LBB0_5
# BB#3:                                 # %land.lhs.true.i
                                        #   in Loop: Header=BB0_2 Depth=1
        lxsspx 0, 4, 7
        lxsspx 1, 3, 8
        fcmpu 1, 1, 0
        bge 1, .LBB0_6
# BB#4:                                 # %if.then.i
                                        #   in Loop: Header=BB0_2 Depth=1
        stxsspx 0, 3, 8
        b .LBB0_8
        .p2align        4
.LBB0_5:                                # %entry.if.else_crit_edge.i
                                        #   in Loop: Header=BB0_2 Depth=1
        lwz 9, 8(4)
        b .LBB0_7
        .p2align        4
.LBB0_6:                                # %land.lhs.true.if.else_crit_edge.i
                                        #   in Loop: Header=BB0_2 Depth=1
        xscvdpspn 0, 0
        xxsldwi 0, 0, 0, 3
        mfvsrwz 9, 0
.LBB0_7:                                # %if.else.i
                                        #   in Loop: Header=BB0_2 Depth=1
        stw 9, 12(3)
.LBB0_8:                                # %bar.exit
                                        #   in Loop: Header=BB0_2 Depth=1
        addi 5, 5, -1
        bdnz .LBB0_2

Statement p1->f1 += p2->f1 is translated to A1,A2,A3,A4. Statement p1->f2 -=
p2->f2 is translated to B1,B2,B3,B4. Inside the loop, all stores are through
pointer p1 only, although p1 can be aliased with p2, it is safe to keep a copy
of p1->f1 and p1->f2 inside registers, so A2 and B1 can be removed from the
loop.

In the source code, if I replace one of "p1->f3 = p2->f3" by "p1->f4 = p2->f4",
then llvm can generate expected optimized code.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160513/83831bf8/attachment.html>