[llvm] 88c1cd8 - [SystemZ] Use STDY/STEY/LDY/LEY for VR32/VR64 in eliminateFrameIndex().

Wed Jun 8 08:10:44 PDT 2022

Author: Jonas Paulsson
Date: 2022-06-08T17:10:31+02:00
New Revision: 88c1cd86eefc2e7e597b44156f3699931d8df9fd

URL: https://github.com/llvm/llvm-project/commit/88c1cd86eefc2e7e597b44156f3699931d8df9fd
DIFF: https://github.com/llvm/llvm-project/commit/88c1cd86eefc2e7e597b44156f3699931d8df9fd.diff

LOG: [SystemZ] Use STDY/STEY/LDY/LEY for VR32/VR64 in eliminateFrameIndex().

When e.g. a VR64 register is spilled to a stack slot requiring a long
(20-bit) displacement, it is possible to use an FP opcode if the allocated
phys reg allows it. This eliminates the use of a separate LAY instruction.

Reviewed By: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D115406

Added: 
    llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZInstrInfo.h
    llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5830d001085af..1436be1e40523 100644

--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1632,7 +1632,8 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
 }
 
 unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
-                                              int64_t Offset) const {
+                                              int64_t Offset,
+                                              const MachineInstr *MI) const {
   const MCInstrDesc &MCID = get(Opcode);
   int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
   if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
@@ -1654,6 +1655,24 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
     // Check whether Opcode allows signed 20-bit displacements.
     if (MCID.TSFlags & SystemZII::Has20BitOffset)
       return Opcode;
+
+    // If a VR32/VR64 reg ended up in an FP register, use the FP opcode.
+    if (MI && MI->getOperand(0).isReg()) {
+      Register Reg = MI->getOperand(0).getReg();
+      if (Reg.isPhysical() && SystemZMC::getFirstReg(Reg) < 16) {
+        switch (Opcode) {
+        case SystemZ::VL32:
+          return SystemZ::LEY;
+        case SystemZ::VST32:
+          return SystemZ::STEY;
+        case SystemZ::VL64:
+          return SystemZ::LDY;
+        case SystemZ::VST64:
+          return SystemZ::STDY;
+        default: break;
+        }
+      }
+    }
   }
   return 0;
 }

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 9e5b2729a7072..48183dc3f332e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -309,8 +309,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
   // and the caller wants to perform that instruction's operation on an
   // address that has displacement Offset.  Return the opcode of a suitable
   // instruction (which might be Opcode itself) or 0 if no such instruction
-  // exists.
-  unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+  // exists.  MI may be passed in order to allow examination of physical
+  // register operands (i.e. if a VR32/64 reg ended up as an FP or Vector reg).
+  unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset,
+                              const MachineInstr *MI = nullptr) const;
 
   // Return true if Opcode has a mapping in 12 <-> 20 bit displacements.
   bool hasDisplacementPairInsn(unsigned Opcode) const;

diff  --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 48cec176b0069..bbc46ff3412f8 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -321,7 +321,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   // See if the offset is in range, or if an equivalent instruction that
   // accepts the offset exists.
   unsigned Opcode = MI->getOpcode();
-  unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
+  unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset, &*MI);
   if (OpcodeForOffset) {
     if (OpcodeForOffset == SystemZ::LE &&
         MF.getSubtarget<SystemZSubtarget>().hasVector()) {

diff  --git a/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll b/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll
new file mode 100644
index 0000000000000..50a8ada39abe5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/elim-frame-index-VR.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+;
+; Test that a spill/reload of a VR32/VR64 reg uses the FP opcode supporting
+; 20-bit displacement if needed and possible.
+
+define void @f1(i32 %arg, ...)  {
+; CHECK-LABEL: f1:
+; CHECK-NOT: lay
+; CHECK: stdy %f0, 4400(%r15)
+bb:
+  %i = alloca [4096 x i8]
+  ret void
+}
+
+define void @f2(float %Arg) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: lay
+; CHECK: stey %f0, 4172(%r15)
+bb:
+  %i = alloca [1000 x float]
+  %i2 = getelementptr inbounds [1000 x float], [1000 x float]* %i, i64 0, i64 999
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  store float %Arg , float* %i2
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+define void @f3(double* %Dst) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: lay
+; CHECK: ldy %f0, 4168(%r15)
+bb:
+  %i = alloca [500 x double]
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  %i12 = getelementptr inbounds [500 x double], [500 x double]* %i, i64 0, i64 499
+  %i13 = load double, double* %i12
+  %i14 = fdiv double %i13, 0.000000e+00
+  store double %i14, double* %Dst
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+define void @f4(float* %Dst) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: lay
+; CHECK: ley %f0, 4172(%r15)
+bb:
+  %i = alloca [1000 x float]
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  %i12 = getelementptr inbounds [1000 x float], [1000 x float]* %i, i64 0, i64 999
+  %i13 = load float, float* %i12
+  %i14 = fdiv float %i13, 0.000000e+00
+  store float %i14, float* %Dst
+  br label %bb3
+
+bb3:
+  ret void
+}