[PATCH] D54583: PowerPC: Optimize SPE double parameter calling setup

vit9696 via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 23 05:13:11 PST 2019


vit9696 added a comment.

Hmm, I have not yet tried to explore this, but I get a feeling a regression appeared somewhere during the patch iterations. Either this or D54409 <https://reviews.llvm.org/D54409>.
At this point I am consistently getting weird generated instructions for __floatundidf from compiler-rt (compiling with freebsd & -O3), yet I have a correct one in my files.
What makes it strange is that the logs show that the correct and first incorrect examples were generated by the same compiler (binary file) with the same flags, yet I no longer can get the correct one.

Does it reproduce for anyone?

Supposedly correct one:

  .set back_chain, -0x20
  .set var_10, -0x10
  .set var_8, -8
  94 21 FF E0                       stwu      r1, back_chain(r1)
  3C A0 45 30                       lis       r5, 0x4530
  90 61 00 1C                       stw       r3, 0x20+var_8+4(r1)
  90 A1 00 18                       stw       r5, 0x20+var_8(r1)
  3C A0 43 30                       lis       r5, 0x4330
  10 61 1B 01                       evldd     r3, 0x20+var_8(r1)
  90 81 00 14                       stw       r4, 0x20+var_10+4(r1)
  3C 80 80 02                       lis       r4, -0x7FFE
  10 84 DB 01                       evldd     r4, 0xD8(r4) ; note this one
  90 A1 00 10                       stw       r5, 0x20+var_10(r1)
  10 A1 13 01                       evldd     r5, 0x20+var_10(r1)
  10 63 22 E0                       efdadd    r3, r3, r4
  10 83 2A E0                       efdadd    r4, r3, r5
  10 64 22 2C                       evmergehi r3, r4, r4
  38 21 00 20                       addi      r1, r1, 0x20
  4E 80 00 20                       blr

What I get now:

  94 21 FF E0                       stwu      r1, back_chain(r1)
  3C A0 45 30                       lis       r5, 0x4530
  90 61 00 1C                       stw       r3, 0x20+var_8+4(r1)
  90 A1 00 18                       stw       r5, 0x20+var_8(r1)
  3C A0 43 30                       lis       r5, 0x4330
  10 61 1B 01                       evldd     r3, 0x20+var_8(r1)
  90 81 00 14                       stw       r4, 0x20+var_10+4(r1)
  3C 80 80 02                       lis       r4, -0x7FFE
  10 8C BB 01                       evldd     r4, 0xB8(r12) ; note this one
  90 A1 00 10                       stw       r5, 0x20+var_10(r1)
  10 A1 13 01                       evldd     r5, 0x20+var_10(r1)
  10 63 22 E0                       efdadd    r3, r3, r4
  10 83 2A E0                       efdadd    r4, r3, r5
  10 64 22 2C                       evmergehi r3, r4, r4
  38 21 00 20                       addi      r1, r1, 0x20
  4E 80 00 20                       blr

or

  94 21 FF E0                       stwu      r1, back_chain(r1)
  3C A0 45 30                       lis       r5, 0x4530
  90 61 00 1C                       stw       r3, 0x20+var_8+4(r1)
  90 A1 00 18                       stw       r5, 0x20+var_8(r1)
  3C A0 43 30                       lis       r5, 0x4330
  10 61 1B 01                       evldd     r3, 0x20+var_8(r1)
  90 81 00 14                       stw       r4, 0x20+var_10+4(r1)
  3C 80 80 01                       lis       r4, -0x7FFF
  10 9F 1B 01                       evldd     r4, 0x18(r31) ; note this one
  90 A1 00 10                       stw       r5, 0x20+var_10(r1)
  10 A1 13 01                       evldd     r5, 0x20+var_10(r1)
  10 63 22 E0                       efdadd    r3, r3, r4
  10 83 2A E0                       efdadd    r4, r3, r5
  10 64 22 2C                       evmergehi r3, r4, r4
  38 21 00 20                       addi      r1, r1, 0x20
  4E 80 00 20                       blr

Reference source:

  double floatundidf(unsigned long long a)
  {
      static const double twop52 = 4503599627370496.0; // 0x1.0p52
      static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
      static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
  
      union { uint64_t x; double d; } high = { .d = twop84 };
      union { uint64_t x; double d; } low = { .d = twop52 };
  
      high.x |= a >> 32;
      low.x |= a & UINT64_C(0x00000000ffffffff);
  
      const double result = (high.d - twop84_plus_twop52) + low.d;
      return result;
  }


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D54583/new/

https://reviews.llvm.org/D54583





More information about the llvm-commits mailing list