[llvm] a9ad65a - [PowerPC] Change default for unaligned FP access for older subtargets

Sat Dec 28 09:41:49 PST 2019

Author: Nemanja Ivanovic
Date: 2019-12-28T11:20:52-06:00
New Revision: a9ad65a2b34f9cbcd207114caa862ef2dc4553c8

URL: https://github.com/llvm/llvm-project/commit/a9ad65a2b34f9cbcd207114caa862ef2dc4553c8
DIFF: https://github.com/llvm/llvm-project/commit/a9ad65a2b34f9cbcd207114caa862ef2dc4553c8.diff

LOG: [PowerPC] Change default for unaligned FP access for older subtargets

This is a fix for https://bugs.llvm.org/show_bug.cgi?id=40554

Some CPU's trap to the kernel on unaligned floating point access and there are
kernels that do not handle the interrupt. The program then fails with a SIGBUS
according to the PR. This just switches the default for unaligned access to only
allow it on recent server CPUs that are known to allow this.

Differential revision: https://reviews.llvm.org/D71954

Added: 
    llvm/test/CodeGen/PowerPC/unaligned-floats.ll

Modified: 
    llvm/lib/Target/PowerPC/PPC.td
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.h
    llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 1d5396912ef0..266b5bf1ba5f 100644

--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+  SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+                   "true", "CPU does not trap on unaligned FP access">;
 def FeaturePPCPreRASched:
   SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
                    "Use PowerPC pre-RA scheduling strategy">;
@@ -252,7 +255,8 @@ def ProcessorFeatures {
                                                   FeatureExtDiv,
                                                   FeatureMFTB,
                                                   DeprecatedDST,
-                                                  FeatureTwoConstNR];
+                                                  FeatureTwoConstNR,
+                                                  FeatureUnalignedFloats];
   list<SubtargetFeature> P7SpecificFeatures = [];
   list<SubtargetFeature> P7Features =
     !listconcat(P7InheritableFeatures, P7SpecificFeatures);

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa0b9a0b7af1..e6969ca87507 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15131,6 +15131,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   if (!VT.isSimple())
     return false;
 
+  if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+    return false;
+
   if (VT.getSimpleVT().isVector()) {
     if (Subtarget.hasVSX()) {
       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 6dff0c126ab5..044e982740e9 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -124,6 +124,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool IsPPC4xx;
   bool IsPPC6xx;
   bool FeatureMFTB;
+  bool AllowsUnalignedFPAccess;
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
   bool IsLittleEndian;
@@ -274,6 +275,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
+  bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
   bool hasICBT() const { return HasICBT; }
   bool hasInvariantFunctionDescriptors() const {

diff  --git a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 86efa0217b6b..41ef5ccd0f96 100644
--- a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,4 +1,5 @@
-; RUN: llc -verify-machineinstrs -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mattr=-vsx \
+; RUN:   -mattr=+allow-unaligned-fp-access < %s | FileCheck %s
 ; ModuleID = 'foo.c'
 
 target triple = "powerpc-unknown-linux-gnu"

diff  --git a/llvm/test/CodeGen/PowerPC/unaligned-floats.ll b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll
new file mode 100644
index 000000000000..af7d36ac1d09
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc-- \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=allow-unaligned-fp-access -ppc-asm-full-reg-names \
+; RUN:   -mtriple=powerpc-- -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc-- \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefix=UNALIGN
+
+; Test case as provided by author in https://bugs.llvm.org/show_bug.cgi?id=40554
+%struct.anon = type { i32, [5 x i8] }
+
+ at s = dso_local local_unnamed_addr global %struct.anon { i32 0, [5 x i8] c"\00B\F6\E9y" }, align 4
+ at .str = private unnamed_addr constant [4 x i8] c"%g\0A\00", align 1
+; Function Attrs: nofree nounwind
+define dso_local i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:       lfs f1, 5(r3)
+; CHECK:       blr
+;
+; UNALIGN-LABEL: main:
+; UNALIGN:       lfs f1, 12(r1)
+; UNALIGN:       blr
+entry:
+  %0 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 1), align 1
+  %conv = zext i8 %0 to i32
+  %shl = shl nuw i32 %conv, 24
+  %1 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 2), align 2
+  %conv1 = zext i8 %1 to i32
+  %shl2 = shl nuw nsw i32 %conv1, 16
+  %add = or i32 %shl2, %shl
+  %2 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 3), align 1
+  %conv3 = zext i8 %2 to i32
+  %shl4 = shl nuw nsw i32 %conv3, 8
+  %add5 = or i32 %add, %shl4
+  %3 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 4), align 4
+  %conv6 = zext i8 %3 to i32
+  %add7 = or i32 %add5, %conv6
+  %4 = bitcast i32 %add7 to float
+  %conv8 = fpext float %4 to double
+  %call = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv8)
+  ret i32 0
+}
+; Function Attrs: nofree nounwind
+declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr