[llvm] f4517bb - [VE] Implement JumpTable

Tue Nov 17 05:43:18 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-11-17T22:43:10+09:00
New Revision: f4517bbd73401160294d2a17b1d68e5807709710

URL: https://github.com/llvm/llvm-project/commit/f4517bbd73401160294d2a17b1d68e5807709710
DIFF: https://github.com/llvm/llvm-project/commit/f4517bbd73401160294d2a17b1d68e5807709710.diff

LOG: [VE] Implement JumpTable

Implement JumpTable to make BRIND work on VE.  Update an existing
br_jt regression test also.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D91582

Added: 
    

Modified: 
    llvm/lib/Target/VE/VEFrameLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td
    llvm/lib/Target/VE/VEMCInstLower.cpp
    llvm/test/CodeGen/VE/Scalar/br_jt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index f27a2d08fd6c..ec18bee74329 100644

--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -92,8 +92,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
   //
   //    or %sp, 0, %fp
   //    ld %s17, 40(,%sp) iff this function is using s17 as BP
-  //    ld %got, 32(,%sp)
-  //    ld %plt, 24(,%sp)
+  //    ld %plt, 32(,%sp)
+  //    ld %got, 24(,%sp)
   //    ld %lr, 8(,%sp)
   //    ld %fp, 0(,%sp)
   BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11).addReg(VE::SX9).addImm(0);

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index b319b6e6645b..864f097315a2 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -153,6 +154,7 @@ void VETargetLowering::initSPUActions() {
   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+  setOperationAction(ISD::JumpTable, PtrVT, Custom);
 
   /// VAARG handling {
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -173,9 +175,7 @@ void VETargetLowering::initSPUActions() {
   // VE doesn't have BRCOND
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
-  // BRIND and BR_JT are not implemented yet.
-  // FIXME: Implement both for the scalar perforamnce.
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+  // BR_JT is not implemented yet.
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 
   /// } Branch
@@ -929,6 +929,9 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
                                        TF);
 
+  if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
+    return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
+
   llvm_unreachable("Unhandled address SDNode");
 }
 
@@ -957,7 +960,7 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
     MFI.setHasCalls(true);
     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
 
-    if (isa<ConstantPoolSDNode>(Op) ||
+    if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
       // Create following instructions for local linkage PIC code.
       //     lea %reg, label at gotoff_lo
@@ -1147,6 +1150,10 @@ SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
   return lowerToTLSGeneralDynamicModel(Op, DAG);
 }
 
+SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
+}
+
 // Lower a f128 load into two f64 loads.
 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
   SDLoc DL(Op);
@@ -1412,6 +1419,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return lowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:
     return lowerGlobalTLSAddress(Op, DAG);
+  case ISD::JumpTable:
+    return lowerJumpTable(Op, DAG);
   case ISD::LOAD:
     return lowerLOAD(Op, DAG);
   case ISD::STORE:
@@ -1424,6 +1433,63 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 }
 /// } Custom Lower
 
+/// JumpTable for VE.
+///
+///   VE cannot generate relocatable symbol in jump table.  VE cannot
+///   generate expressions using symbols in both text segment and data
+///   segment like below.
+///             .4byte  .LBB0_2-.LJTI0_0
+///   So, we generate offset from the top of function like below as
+///   a custom label.
+///             .4byte  .LBB0_2-<function name>
+
+unsigned VETargetLowering::getJumpTableEncoding() const {
+  // Use custom label for PIC.
+  if (isPositionIndependent())
+    return MachineJumpTableInfo::EK_Custom32;
+
+  // Otherwise, use the normal jump table encoding heuristics.
+  return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
+    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+    unsigned Uid, MCContext &Ctx) const {
+  assert(isPositionIndependent());
+
+  // Generate custom label for PIC like below.
+  //    .4bytes  .LBB0_2-<function name>
+  const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
+  const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
+  return MCBinaryExpr::createSub(Value, Base, Ctx);
+}
+
+SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                   SelectionDAG &DAG) const {
+  assert(isPositionIndependent());
+  SDLoc DL(Table);
+  Function *Function = &DAG.getMachineFunction().getFunction();
+  assert(Function != nullptr);
+  auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
+
+  // In the jump table, we have following values in PIC mode.
+  //    .4bytes  .LBB0_2-<function name>
+  // We need to add this value and the address of this function to generate
+  // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
+  // instructions:
+  //     lea %reg, fun at gotoff_lo
+  //     and %reg, %reg, (32)0
+  //     lea.sl %reg, fun at gotoff_hi(%reg, %got)
+  // In order to do so, we need to genarate correctly marked DAG node using
+  // makeHiLoPair.
+  SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
+  SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+                              VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+  SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
+  return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
+}
+
 static bool isI32Insn(const SDNode *User, const SDNode *N) {
   switch (User->getOpcode()) {
   default:

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 4ac609b73830..050496b9133b 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -92,6 +92,15 @@ class VETargetLowering : public TargetLowering {
 
   /// Custom Lower {
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  unsigned getJumpTableEncoding() const override;
+  const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                          const MachineBasicBlock *MBB,
+                                          unsigned Uid,
+                                          MCContext &Ctx) const override;
+  SDValue getPICJumpTableRelocBase(SDValue Table,
+                                   SelectionDAG &DAG) const override;
+  // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only
+  // EK_LabelDifference32.
 
   SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -99,6 +108,7 @@ class VETargetLowering : public TargetLowering {
   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 1a15058cf6c4..0d5bbf282ddf 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1603,7 +1603,7 @@ def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
 def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
                              (LEASLrri $base, $lo, $hi)>;
 foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
-                 "tglobaltlsaddr" ] in {
+                 "tglobaltlsaddr", "tjumptable" ] in {
   def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
   def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
   def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;

diff  --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp
index c14121d9e18a..bc5577ce4f97 100644
--- a/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -63,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
     return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
   case MachineOperand::MO_Immediate:
     return MCOperand::createImm(MO.getImm());
-
+  case MachineOperand::MO_JumpTableIndex:
+    return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP);
   case MachineOperand::MO_MachineBasicBlock:
     return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);
 

diff  --git a/llvm/test/CodeGen/VE/Scalar/br_jt.ll b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
index 86c089474c9d..a7218965c467 100644
--- a/llvm/test/CodeGen/VE/Scalar/br_jt.ll
+++ b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
@@ -1,36 +1,96 @@
 ; RUN: llc < %s -mtriple=ve | FileCheck %s
+; RUN: llc < %s -mtriple=ve -relocation-model=pic \
+; RUN:     | FileCheck %s -check-prefix=PIC
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @br_jt(i32 signext %0) {
 ; CHECK-LABEL: br_jt:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT:    brlt.w 2, %s0, .LBB{{[0-9]+}}_4
+; CHECK-NEXT:    adds.w.sx %s1, -1, %s0
+; CHECK-NEXT:    cmpu.w %s2, 3, %s1
+; CHECK-NEXT:    brgt.w 0, %s2, .LBB{{[0-9]+}}_5
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    breq.w 1, %s0, .LBB{{[0-9]+}}_8
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    brne.w 2, %s0, .LBB{{[0-9]+}}_7
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    sll %s0, %s0, 3
+; CHECK-NEXT:    lea %s1, .LJTI0_0 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, .LJTI0_0 at hi(, %s1)
+; CHECK-NEXT:    ld %s1, (%s1, %s0)
+; CHECK-NEXT:    or %s0, 3, (0)1
+; CHECK-NEXT:    b.l.t (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_4:
-; CHECK-NEXT:    breq.w 3, %s0, .LBB{{[0-9]+}}_9
-; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    brne.w 4, %s0, .LBB{{[0-9]+}}_7
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    or %s0, 7, (0)1
-; CHECK-NEXT:  .LBB{{[0-9]+}}_7:
-; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_8:
-; CHECK-NEXT:    or %s0, 3, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:    or %s0, 4, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_9:
-; CHECK-NEXT:    or %s0, 4, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:    or %s0, 7, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt:
+; PIC:       # %bb.0:
+; PIC-NEXT:    st %s9, (, %s11)
+; PIC-NEXT:    st %s10, 8(, %s11)
+; PIC-NEXT:    st %s15, 24(, %s11)
+; PIC-NEXT:    st %s16, 32(, %s11)
+; PIC-NEXT:    or %s9, 0, %s11
+; PIC-NEXT:    lea %s13, -176
+; PIC-NEXT:    and %s13, %s13, (32)0
+; PIC-NEXT:    lea.sl %s11, -1(%s13, %s11)
+; PIC-NEXT:    brge.l %s11, %s8, .LBB0_7
+; PIC-NEXT:  # %bb.6:
+; PIC-NEXT:    ld %s61, 24(, %s14)
+; PIC-NEXT:    or %s62, 0, %s0
+; PIC-NEXT:    lea %s63, 315
+; PIC-NEXT:    shm.l %s63, (%s61)
+; PIC-NEXT:    shm.l %s8, 8(%s61)
+; PIC-NEXT:    shm.l %s11, 16(%s61)
+; PIC-NEXT:    monc
+; PIC-NEXT:    or %s0, 0, %s62
+; PIC-NEXT:  .LBB0_7:
+; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT:    adds.w.sx %s1, -1, %s0
+; PIC-NEXT:    cmpu.w %s2, 3, %s1
+; PIC-NEXT:    lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; PIC-NEXT:    and %s15, %s15, (32)0
+; PIC-NEXT:    sic %s16
+; PIC-NEXT:    lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT:    brgt.w 0, %s2, .LBB0_5
+; PIC-NEXT:  # %bb.1:
+; PIC-NEXT:    adds.w.zx %s0, %s1, (0)1
+; PIC-NEXT:    sll %s0, %s0, 2
+; PIC-NEXT:    lea %s1, .LJTI0_0 at gotoff_lo
+; PIC-NEXT:    and %s1, %s1, (32)0
+; PIC-NEXT:    lea.sl %s1, .LJTI0_0 at gotoff_hi(%s1, %s15)
+; PIC-NEXT:    ldl.sx %s0, (%s1, %s0)
+; PIC-NEXT:    lea %s1, br_jt at gotoff_lo
+; PIC-NEXT:    and %s1, %s1, (32)0
+; PIC-NEXT:    lea.sl %s1, br_jt at gotoff_hi(%s1, %s15)
+; PIC-NEXT:    adds.l %s1, %s0, %s1
+; PIC-NEXT:    or %s0, 3, (0)1
+; PIC-NEXT:    b.l.t (, %s1)
+; PIC-NEXT:  .LBB0_2:
+; PIC-NEXT:    or %s0, 0, (0)1
+; PIC-NEXT:    br.l.t .LBB0_5
+; PIC-NEXT:  .LBB0_3:
+; PIC-NEXT:    or %s0, 4, (0)1
+; PIC-NEXT:    br.l.t .LBB0_5
+; PIC-NEXT:  .LBB0_4:
+; PIC-NEXT:    or %s0, 7, (0)1
+; PIC-NEXT:  .LBB0_5:
+; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT:    or %s11, 0, %s9
+; PIC-NEXT:    ld %s16, 32(, %s11)
+; PIC-NEXT:    ld %s15, 24(, %s11)
+; PIC-NEXT:    ld %s10, 8(, %s11)
+; PIC-NEXT:    ld %s9, (, %s11)
+; PIC-NEXT:    b.l.t (, %s10)
   switch i32 %0, label %5 [
     i32 1, label %6
     i32 2, label %2