[llvm] a9ad65a - [PowerPC] Change default for unaligned FP access for older subtargets
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 28 09:41:49 PST 2019
Author: Nemanja Ivanovic
Date: 2019-12-28T11:20:52-06:00
New Revision: a9ad65a2b34f9cbcd207114caa862ef2dc4553c8
URL: https://github.com/llvm/llvm-project/commit/a9ad65a2b34f9cbcd207114caa862ef2dc4553c8
DIFF: https://github.com/llvm/llvm-project/commit/a9ad65a2b34f9cbcd207114caa862ef2dc4553c8.diff
LOG: [PowerPC] Change default for unaligned FP access for older subtargets
This is a fix for https://bugs.llvm.org/show_bug.cgi?id=40554
Some CPU's trap to the kernel on unaligned floating point access and there are
kernels that do not handle the interrupt. The program then fails with a SIGBUS
according to the PR. This just switches the default for unaligned access to only
allow it on recent server CPUs that are known to allow this.
Differential revision: https://reviews.llvm.org/D71954
Added:
llvm/test/CodeGen/PowerPC/unaligned-floats.ll
Modified:
llvm/lib/Target/PowerPC/PPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCSubtarget.h
llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 1d5396912ef0..266b5bf1ba5f 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+ SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+ "true", "CPU does not trap on unaligned FP access">;
def FeaturePPCPreRASched:
SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
"Use PowerPC pre-RA scheduling strategy">;
@@ -252,7 +255,8 @@ def ProcessorFeatures {
FeatureExtDiv,
FeatureMFTB,
DeprecatedDST,
- FeatureTwoConstNR];
+ FeatureTwoConstNR,
+ FeatureUnalignedFloats];
list<SubtargetFeature> P7SpecificFeatures = [];
list<SubtargetFeature> P7Features =
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa0b9a0b7af1..e6969ca87507 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15131,6 +15131,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
+ if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ return false;
+
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 6dff0c126ab5..044e982740e9 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -124,6 +124,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
bool IsPPC4xx;
bool IsPPC6xx;
bool FeatureMFTB;
+ bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsLittleEndian;
@@ -274,6 +275,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
+ bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
bool isDeprecatedDST() const { return DeprecatedDST; }
bool hasICBT() const { return HasICBT; }
bool hasInvariantFunctionDescriptors() const {
diff --git a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 86efa0217b6b..41ef5ccd0f96 100644
--- a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,4 +1,5 @@
-; RUN: llc -verify-machineinstrs -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mattr=-vsx \
+; RUN: -mattr=+allow-unaligned-fp-access < %s | FileCheck %s
; ModuleID = 'foo.c'
target triple = "powerpc-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/PowerPC/unaligned-floats.ll b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll
new file mode 100644
index 000000000000..af7d36ac1d09
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc-- \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=allow-unaligned-fp-access -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc-- -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc-- \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=UNALIGN
+
+; Test case as provided by author in https://bugs.llvm.org/show_bug.cgi?id=40554
+%struct.anon = type { i32, [5 x i8] }
+
+ at s = dso_local local_unnamed_addr global %struct.anon { i32 0, [5 x i8] c"\00B\F6\E9y" }, align 4
+ at .str = private unnamed_addr constant [4 x i8] c"%g\0A\00", align 1
+; Function Attrs: nofree nounwind
+define dso_local i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK: lfs f1, 5(r3)
+; CHECK: blr
+;
+; UNALIGN-LABEL: main:
+; UNALIGN: lfs f1, 12(r1)
+; UNALIGN: blr
+entry:
+ %0 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 1), align 1
+ %conv = zext i8 %0 to i32
+ %shl = shl nuw i32 %conv, 24
+ %1 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 2), align 2
+ %conv1 = zext i8 %1 to i32
+ %shl2 = shl nuw nsw i32 %conv1, 16
+ %add = or i32 %shl2, %shl
+ %2 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 3), align 1
+ %conv3 = zext i8 %2 to i32
+ %shl4 = shl nuw nsw i32 %conv3, 8
+ %add5 = or i32 %add, %shl4
+ %3 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 4), align 4
+ %conv6 = zext i8 %3 to i32
+ %add7 = or i32 %add5, %conv6
+ %4 = bitcast i32 %add7 to float
+ %conv8 = fpext float %4 to double
+ %call = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv8)
+ ret i32 0
+}
+; Function Attrs: nofree nounwind
+declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr
More information about the llvm-commits
mailing list