[llvm-branch-commits] [llvm] release/19.x: [CalcSpillWeights] Avoid x87 excess precision influencing weight result (PR #102207)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 6 12:46:12 PDT 2024
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102207
Backport c80c09f3e380a0a2b00b36bebf72f43271a564c1
Requested by: @DimitryAndric
>From c472fe3ab850b114a4349c602f375ae0294f57c4 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry at andric.com>
Date: Tue, 23 Jul 2024 19:02:36 +0200
Subject: [PATCH] [CalcSpillWeights] Avoid x87 excess precision influencing
weight result
Fixes #99396
The result of `VirtRegAuxInfo::weightCalcHelper` can be influenced by
x87 excess precision, which can result in slightly different register
choices when the compiler is hosted on x86_64 or i386. This leads to
different object file output when cross-compiling to i386, or native.
Similar to 7af3432e22b0, we need to add a `volatile` qualifier to the
local `Weight` variable to force it onto the stack, and avoid the excess
precision. Define `stack_float_t` in `MathExtras.h` for this purpose,
and use it.
(cherry picked from commit c80c09f3e380a0a2b00b36bebf72f43271a564c1)
---
llvm/include/llvm/Support/MathExtras.h | 8 ++++
llvm/lib/CodeGen/CalcSpillWeights.cpp | 11 ++---
llvm/test/CodeGen/X86/pr99396.ll | 56 ++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/pr99396.ll
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 0d0fa826f7bba..e568e42afcf4d 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -770,6 +770,14 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
#endif
}
+/// Type to force float point values onto the stack, so that x86 doesn't add
+/// hidden precision, avoiding rounding differences on various platforms.
+#if defined(__i386__) || defined(_M_IX86)
+using stack_float_t = volatile float;
+#else
+using stack_float_t = float;
+#endif
+
} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 1d767a3484bca..9d8c9119f7719 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <tuple>
@@ -257,7 +258,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
return -1.0f;
}
- float Weight = 1.0f;
+ // Force Weight onto the stack so that x86 doesn't add hidden precision,
+ // similar to HWeight below.
+ stack_float_t Weight = 1.0f;
if (IsSpillable) {
// Get loop info for mi.
if (MI->getParent() != MBB) {
@@ -284,11 +287,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
if (!HintReg)
continue;
- // Force hweight onto the stack so that x86 doesn't add hidden precision,
+ // Force HWeight onto the stack so that x86 doesn't add hidden precision,
// making the comparison incorrectly pass (i.e., 1 > 1 == true??).
- //
- // FIXME: we probably shouldn't use floats at all.
- volatile float HWeight = Hint[HintReg] += Weight;
+ stack_float_t HWeight = Hint[HintReg] += Weight;
if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
CopyHints.insert(CopyHint(HintReg, HWeight));
}
diff --git a/llvm/test/CodeGen/X86/pr99396.ll b/llvm/test/CodeGen/X86/pr99396.ll
new file mode 100644
index 0000000000000..f534d32038c22
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr99396.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=i386-unknown-freebsd -enable-misched -relocation-model=pic | FileCheck %s
+
+ at c = external local_unnamed_addr global ptr
+
+declare i32 @fn2() local_unnamed_addr
+
+declare i32 @fn3() local_unnamed_addr
+
+define noundef i32 @fn4() #0 {
+entry:
+ %tmp0 = load i32, ptr @fn4, align 4
+; CHECK: movl fn4 at GOT(%ebx), %edi
+; CHECK-NEXT: movl (%edi), %edx
+ %tmp1 = load ptr, ptr @c, align 4
+; CHECK: movl c at GOT(%ebx), %eax
+; CHECK-NEXT: movl (%eax), %esi
+; CHECK-NEXT: testl %esi, %esi
+ %cmp.g = icmp eq ptr %tmp1, null
+ br i1 %cmp.g, label %if.then.g, label %if.end3.g
+
+if.then.g: ; preds = %entry
+ %tmp2 = load i32, ptr inttoptr (i32 1 to ptr), align 4
+ %cmp1.g = icmp slt i32 %tmp2, 0
+ br i1 %cmp1.g, label %if.then2.g, label %if.end3.g
+
+if.then2.g: ; preds = %if.then.g
+ %.g = load volatile i32, ptr null, align 2147483648
+ br label %f.exit
+
+if.end3.g: ; preds = %if.then.g, %entry
+ %h.i.g = icmp eq i32 %tmp0, 0
+ br i1 %h.i.g, label %f.exit, label %while.body.g
+
+while.body.g: ; preds = %if.end3.g, %if.end8.g
+ %buff.addr.019.g = phi ptr [ %incdec.ptr.g, %if.end8.g ], [ @fn4, %if.end3.g ]
+ %g.addr.018.g = phi i32 [ %dec.g, %if.end8.g ], [ %tmp0, %if.end3.g ]
+ %call4.g = tail call i32 @fn3(ptr %tmp1, ptr %buff.addr.019.g, i32 %g.addr.018.g)
+ %cmp5.g = icmp slt i32 %call4.g, 0
+ br i1 %cmp5.g, label %if.then6.g, label %if.end8.g
+
+if.then6.g: ; preds = %while.body.g
+ %call7.g = tail call i32 @fn2(ptr null)
+ br label %f.exit
+
+if.end8.g: ; preds = %while.body.g
+ %dec.g = add i32 %g.addr.018.g, 1
+ %incdec.ptr.g = getelementptr i32, ptr %buff.addr.019.g, i32 1
+ store i64 0, ptr %tmp1, align 4
+ %h.not.g = icmp eq i32 %dec.g, 0
+ br i1 %h.not.g, label %f.exit, label %while.body.g
+
+f.exit: ; preds = %if.end8.g, %if.then6.g, %if.end3.g, %if.then2.g
+ ret i32 0
+}
+
+attributes #0 = { "frame-pointer"="all" "tune-cpu"="generic" }
More information about the llvm-branch-commits
mailing list