[llvm] 95ea50e - [VE] Correct LVLGen (LVL instruction insert pass)

Tue Dec 8 13:33:59 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-12-09T06:33:53+09:00
New Revision: 95ea50e4adf76b75fcc0ad29cacd10642db091a6

URL: https://github.com/llvm/llvm-project/commit/95ea50e4adf76b75fcc0ad29cacd10642db091a6
DIFF: https://github.com/llvm/llvm-project/commit/95ea50e4adf76b75fcc0ad29cacd10642db091a6.diff

LOG: [VE] Correct LVLGen (LVL instruction insert pass)

SX Aurora VE uses an intermediate representation similar to VP as its MIR.
VE itself uses invidiual VL register as its own vector length register at
the hardware level.  So, LLVM needs to insert load VL (LVL) instruction just
before vector instructions if the value of VL is changed.  This LVLGen pass
generates LVL instructions for such purpose.  Previously, a bug is pointed
out in D91416.  This patch correct this bug and add a regression test.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D92716

Added: 
    

Modified: 
    llvm/lib/Target/VE/LVLGen.cpp
    llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp
index 08b350a581dc..c4588926af9e 100644

--- a/llvm/lib/Target/VE/LVLGen.cpp
+++ b/llvm/lib/Target/VE/LVLGen.cpp
@@ -68,6 +68,12 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
     MachineBasicBlock::iterator MI = I;
 
+    // Check whether MI uses a vector length operand.  If so, we prepare for VL
+    // register.  We would like to reuse VL register as much as possible.  We
+    // also would like to keep the number of LEA instructions as fewer as
+    // possible.  Therefore, we use a regular scalar register to hold immediate
+    // values to load VL register.  And try to reuse identical scalar registers
+    // to avoid new LVLr instructions as much as possible.
     unsigned Reg = getVL(*MI);
     if (Reg != VE::NoRegister) {
       LLVM_DEBUG(dbgs() << "Vector instruction found: ");
@@ -78,6 +84,8 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
                         << ". ");
 
       if (!HasRegForVL || RegForVL != Reg) {
+        // Use VL, but a 
diff erent value in a 
diff erent scalar register.
+        // So, generate new LVL instruction just before the current instruction.
         LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
                           << RegName(Reg) << ".\n");
         BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
@@ -87,18 +95,15 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       } else {
         LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
       }
-    } else if (HasRegForVL) {
-      // Old VL is overwritten, so disable HasRegForVL.
-      if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
-        LLVM_DEBUG(MI->dump());
-        HasRegForVL = false;
-      }
     }
+    // Check the update of a given scalar register holding an immediate value
+    // for VL register.  Also, a call doesn't preserve VL register.
     if (HasRegForVL) {
-      // The latest VL is killed, so disable HasRegForVL.
-      if (MI->killsRegister(RegForVL, TRI)) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+      if (MI->definesRegister(RegForVL, TRI) ||
+          MI->modifiesRegister(RegForVL, TRI) ||
+          MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+        // The latest VL is needed to be updated, so disable HasRegForVL.
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
         LLVM_DEBUG(MI->dump());
         HasRegForVL = false;
       }

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
index ac889e7b60ca..c4db62442451 100644
--- a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
@@ -42,7 +42,6 @@ define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
 ; Check that no redundant 'lvl' is inserted when vector length does not change
 ; in a basic block.
 
-
 ; Function Attrs: nounwind
 define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
 ; CHECK-LABEL: stable_vl:
@@ -64,3 +63,43 @@ define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
   tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
   ret void
 }
+
+;;; Check the case we have a call in the middle of vector instructions.
+
+; Function Attrs: nounwind
+define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: call_invl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s18, 288(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s19, 296(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s20, 304(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    or %s18, 0, %s1
+; CHECK-NEXT:    and %s20, %s0, (32)0
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    or %s19, 0, %s2
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    lea %s0, fun at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fun at hi(, %s0)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 16, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    vld %v0, 8, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    ld %s20, 304(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s19, 296(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s18, 288(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  call void @fun()
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}
+
+declare void @fun()