[PATCH] D36388: [X86][SandyBridge] Additional updates to the SNB instructions scheduling information
Dimitry Andric via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 12 12:52:11 PDT 2017
dim added a comment.
In https://reviews.llvm.org/D36388#836337, @RKSimon wrote:
> @dim What effect does this patch have on PR34080? The changes in pr34080.ll aren't looking hopeful
It seems to fix my minimized test case, e.g.:
#include <stdio.h>
__attribute__((noinline)) int g(double *tx) {
int bad = (tx[2] != 0x1.93p+16);
printf("tx[0]=%.20a tx[1]=%.20a tx[2]=%.20a: %s\n", tx[0], tx[1], tx[2], bad ? "Bad!" : "OK");
return bad;
}
__attribute__((noinline)) int f(long double z) {
int i;
double tx[3];
for (i = 0; i < 2; i++) {
tx[i] = (double)((int)(z));
z = (z - tx[i]) * 1.6777216e+07;
//printf("z=%.20La\n", z);
}
tx[2] = z;
return g(tx);
}
int main(void)
{
return f(0x1.b2f3ee96e7600326p+23L);
}
Although the resulting assembly is very slightly different from before https://reviews.llvm.org/rL307529. So with trunk https://reviews.llvm.org/rL307528, it gives (only the body of function `f` shown):
fldt 64(%rsp)
fnstcw 6(%rsp)
movzwl 6(%rsp), %eax
movw $3199, 6(%rsp) # imm = 0xC7F
fldcw 6(%rsp)
movw %ax, 6(%rsp)
fistl 8(%rsp)
fldcw 6(%rsp)
cvtsi2sdl 8(%rsp), %xmm0
movsd %xmm0, 32(%rsp)
movsd %xmm0, 24(%rsp)
fsubl 24(%rsp)
flds .LCPI1_0(%rip)
fmul %st(0), %st(1)
fnstcw 4(%rsp)
movzwl 4(%rsp), %eax
movw $3199, 4(%rsp) # imm = 0xC7F
fldcw 4(%rsp)
movw %ax, 4(%rsp)
fxch %st(1)
fistl 12(%rsp)
fldcw 4(%rsp)
xorps %xmm0, %xmm0
cvtsi2sdl 12(%rsp), %xmm0
movsd %xmm0, 40(%rsp)
movsd %xmm0, 16(%rsp)
fsubl 16(%rsp)
fmulp %st(1)
fstpl 48(%rsp)
leaq 32(%rsp), %rdi
callq g
addq $56, %rsp
retq
With trunk https://reviews.llvm.org/rL307529 through https://reviews.llvm.org/rL310782, it gives:
fnstcw 6(%rsp)
movzwl 6(%rsp), %eax
movw $3199, 6(%rsp) # imm = 0xC7F
fldcw 6(%rsp)
fldt 64(%rsp)
movw %ax, 6(%rsp)
fistl 8(%rsp)
fldcw 6(%rsp)
cvtsi2sdl 8(%rsp), %xmm0
movsd %xmm0, 32(%rsp)
movsd %xmm0, 24(%rsp)
fsubl 24(%rsp)
fnstcw 4(%rsp)
flds .LCPI1_0(%rip)
movzwl 4(%rsp), %eax
movw $3199, 4(%rsp) # imm = 0xC7F
fldcw 4(%rsp)
fmul %st(0), %st(1)
movw %ax, 4(%rsp)
fxch %st(1)
fistl 12(%rsp)
fldcw 4(%rsp)
xorps %xmm0, %xmm0
cvtsi2sdl 12(%rsp), %xmm0
movsd %xmm0, 40(%rsp)
movsd %xmm0, 16(%rsp)
fsubl 16(%rsp)
fmulp %st(1)
fstpl 48(%rsp)
leaq 32(%rsp), %rdi
callq g
addq $56, %rsp
retq
Where the most important change (and the source of errors) is that the `flds .LCPI1_0(%rip)` and `fmul %st(0), %st(1)` are moved apart from each other.
However, applying https://reviews.llvm.org/D36388 to https://reviews.llvm.org/rL310782 results in:
fnstcw 6(%rsp)
fldt 64(%rsp)
movzwl 6(%rsp), %eax
movw $3199, 6(%rsp) # imm = 0xC7F
fldcw 6(%rsp)
movw %ax, 6(%rsp)
fistl 8(%rsp)
fldcw 6(%rsp)
cvtsi2sdl 8(%rsp), %xmm0
movsd %xmm0, 32(%rsp)
movsd %xmm0, 24(%rsp)
fsubl 24(%rsp)
flds .LCPI1_0(%rip)
fnstcw 4(%rsp)
fmul %st(0), %st(1)
movzwl 4(%rsp), %eax
movw $3199, 4(%rsp) # imm = 0xC7F
fldcw 4(%rsp)
movw %ax, 4(%rsp)
fxch %st(1)
fistl 12(%rsp)
fldcw 4(%rsp)
xorps %xmm0, %xmm0
cvtsi2sdl 12(%rsp), %xmm0
movsd %xmm0, 40(%rsp)
movsd %xmm0, 16(%rsp)
fsubl 16(%rsp)
fmulp %st(1)
fstpl 48(%rsp)
leaq 32(%rsp), %rdi
callq g
addq $56, %rsp
retq
So the `flds .LCPI1_0(%rip)` and `fmul %st(0), %st(1)` are now only interspersed with a `fnstcw 4(%rsp)`, which does not seem to affect the outcome.
Diff of the assembly output of stock https://reviews.llvm.org/rL307528 and https://reviews.llvm.org/rL310782 with https://reviews.llvm.org/D36388:
--- pio2n-r307528.s
+++ pio2n-r310782-D36388.s
@@ -53,8 +53,8 @@
subq $56, %rsp
.Lcfi2:
.cfi_def_cfa_offset 64
- fldt 64(%rsp)
fnstcw 6(%rsp)
+ fldt 64(%rsp)
movzwl 6(%rsp), %eax
movw $3199, 6(%rsp) # imm = 0xC7F
fldcw 6(%rsp)
@@ -66,8 +66,8 @@
movsd %xmm0, 24(%rsp)
fsubl 24(%rsp)
flds .LCPI1_0(%rip)
- fmul %st(0), %st(1)
fnstcw 4(%rsp)
+ fmul %st(0), %st(1)
movzwl 4(%rsp), %eax
movw $3199, 4(%rsp) # imm = 0xC7F
fldcw 4(%rsp)
Repository:
rL LLVM
https://reviews.llvm.org/D36388
More information about the llvm-commits
mailing list