[llvm] r263431 - [SystemZ] Avoid LER on z13 due to partial register dependencies

Mon Mar 14 06:50:04 PDT 2016

Author: uweigand
Date: Mon Mar 14 08:50:03 2016
New Revision: 263431

URL: http://llvm.org/viewvc/llvm-project?rev=263431&view=rev
Log:
[SystemZ] Avoid LER on z13 due to partial register dependencies

On the z13, it turns out to be more efficient to access a full
floating-point register than just the upper half (as done e.g.
by the LE and LER instructions).

Current code already takes this into account when loading from
memory by using the LDE instruction in place of LE.  However,
we still generate LER, which shows the same performance issues
as LE in certain circumstances.

This patch changes the back-end to emit LDR instead of LER to
implement FP32 register-to-register copies on z13.


Added:
    llvm/trunk/test/CodeGen/SystemZ/fp-move-12.ll
Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td
    llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/trunk/test/CodeGen/SystemZ/fp-move-01.ll
    llvm/trunk/test/CodeGen/SystemZ/vec-sub-01.ll

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td?rev=263431&r1=263430&r2=263431&view=diff
==============================================================================

--- llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td Mon Mar 14 08:50:03 2016
@@ -37,6 +37,10 @@ let hasSideEffects = 0 in {
   def LER : UnaryRR <"le", 0x38,   null_frag, FP32,  FP32>;
   def LDR : UnaryRR <"ld", 0x28,   null_frag, FP64,  FP64>;
   def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
+
+  // For z13 we prefer LDR over LER to avoid partial register dependencies.
+  let isCodeGenOnly = 1 in
+    def LDR32 : UnaryRR<"ld", 0x28, null_frag, FP32, FP32>;
 }
 
 // Moves between two floating-point registers that also set the condition

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp?rev=263431&r1=263430&r2=263431&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp Mon Mar 14 08:50:03 2016
@@ -572,7 +572,8 @@ void SystemZInstrInfo::copyPhysReg(Machi
   if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::LGR;
   else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
-    Opcode = SystemZ::LER;
+    // For z13 we prefer LDR over LER to avoid partial register dependencies.
+    Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER;
   else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::LDR;
   else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))

Modified: llvm/trunk/test/CodeGen/SystemZ/fp-move-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/fp-move-01.ll?rev=263431&r1=263430&r2=263431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/fp-move-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/fp-move-01.ll Mon Mar 14 08:50:03 2016
@@ -1,7 +1,6 @@
 ; Test moves between FPRs.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Test f32 moves.
 define float @f1(float %a, float %b) {

Added: llvm/trunk/test/CodeGen/SystemZ/fp-move-12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/fp-move-12.ll?rev=263431&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/fp-move-12.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/fp-move-12.ll Mon Mar 14 08:50:03 2016
@@ -0,0 +1,33 @@
+; Test moves between FPRs on z13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that we use LDR instead of LER.
+define float @f1(float %a, float %b) {
+; CHECK-LABEL: f1:
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  ret float %b
+}
+
+; Test f64 moves.
+define double @f2(double %a, double %b) {
+; CHECK-LABEL: f2:
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  ret double %b
+}
+
+; Test f128 moves.  Since f128s are passed by reference, we need to force
+; a copy by other means.
+define void @f3(fp128 *%x) {
+; CHECK-LABEL: f3:
+; CHECK: lxr
+; CHECK: axbr
+; CHECK: br %r14
+  %val = load volatile fp128 , fp128 *%x
+  %sum = fadd fp128 %val, %val
+  store volatile fp128 %sum, fp128 *%x
+  store volatile fp128 %val, fp128 *%x
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/vec-sub-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-sub-01.ll?rev=263431&r1=263430&r2=263431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-sub-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-sub-01.ll Mon Mar 14 08:50:03 2016
@@ -52,7 +52,7 @@ define <4 x float> @f5(<4 x float> %val1
 ; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
 ; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
 ; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
-; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]]
+; CHECK-DAG: ldr %f[[A1copy:[0-5]]], %f[[A1]]
 ; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
 ; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
 ; CHECK-DAG: sebr %f[[C1]], %f[[C2]]