[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation

robert muth robert at muth.org
Tue Jun 2 18:26:31 PDT 2009


Hi:

This is my first patch submission. Hopefully, this is the proper the protocol.
Attached is a patch for the llc ARM backend:

Added mechanism to generate switch table in a data section
rather than having it interleaved with the code.
This is controlled by command line flags and off by default.
Also, tried to document and improve the code where I modified it.

Robert
-------------- next part --------------
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h	(revision 72728)
+++ lib/Target/ARM/ARMISelLowering.h	(working copy)
@@ -147,9 +147,10 @@
     /// make the right decision when generating code for different targets.
     const ARMSubtarget *Subtarget;
 
-    /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
-    ///
+    /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
     unsigned ARMPCLabelIndex;
+    /// ARMJumpTableIndex - Keep track of the number ofJump Tables
+    unsigned ARMJumpTableIndex;
 
     SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                              const SDValue &StackPtr, const CCValAssign &VA,
Index: lib/Target/ARM/ARMConstantPoolValue.h
===================================================================
--- lib/Target/ARM/ARMConstantPoolValue.h	(revision 72728)
+++ lib/Target/ARM/ARMConstantPoolValue.h	(working copy)
@@ -25,6 +25,7 @@
   enum ARMCPKind {
     CPValue,
     CPNonLazyPtr,
+    CPDataSegmentJumpTable,
     CPStub
   };
 }
@@ -54,7 +55,6 @@
   ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
                        const char *Modifier);
 
-
   GlobalValue *getGV() const { return GV; }
   const char *getSymbol() const { return S; }
   const char *getModifier() const { return Modifier; }
@@ -63,6 +63,9 @@
   unsigned getLabelId() const { return LabelId; }
   bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
   bool isStub() const { return Kind == ARMCP::CPStub; }
+  bool isValue() const { return Kind == ARMCP::CPValue; }
+  bool isDataSegmentJumpTable() const {
+    return Kind == ARMCP::CPDataSegmentJumpTable; }
   unsigned char getPCAdjustment() const { return PCAdjust; }
 
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
Index: lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
===================================================================
--- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp	(revision 72728)
+++ lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp	(working copy)
@@ -33,13 +33,19 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
+#include <sstream>
 using namespace llvm;
 
+extern cl::opt<bool> FlagOutlineJumpTables;
+cl::opt<std::string> FlagJumpTableSection("jumptable-section",
+                                           cl::init(".data.jtab"));
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 namespace {
@@ -139,6 +145,7 @@
     /// EmitMachineConstantPoolValue - Print a machine constantpool value to
     /// the .s file.
     virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+      // NOTE: A lot of this code is replicated in  ARMConstantPoolValue::print
       printDataDirective(MCPV->getType());
 
       ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -155,8 +162,13 @@
       } else if (ACPV->isStub()) {
         FnStubs.insert(Name);
         printSuffixedName(Name, "$stub");
-      } else
+      } else if (ACPV->isDataSegmentJumpTable()) {
+        // requires synchronization with code (grep for "$jumptable$")
+        O << Name << "$jumptable$" << ACPV->getLabelId();
+      } else {
+        assert(ACPV->isValue() && "unknown CP kind");
         O << Name;
+      }
       if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
       if (ACPV->getPCAdjustment() != 0) {
         O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
@@ -695,12 +707,31 @@
   }
 }
 
+
 void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
-  const MachineOperand &MO1 = MI->getOperand(OpNo);
-  const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
-  unsigned JTI = MO1.getIndex();
-  O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-    << '_' << JTI << '_' << MO2.getImm() << ":\n";
+  std::stringstream  prefix;
+
+  const unsigned JTI =  MI->getOperand(OpNo).getIndex();
+  const unsigned uid = MI->getOperand(OpNo+1).getImm();
+
+  if (FlagOutlineJumpTables) {
+    // needs to be synchronized with ARMConstantPoolValue.cpp
+    prefix << ".T$jumptable$" << uid;
+  } else {
+    // needs to be synchronized with other places in this file
+    prefix  << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+            << '_' << JTI << '_' << uid;
+  }
+
+  if (FlagOutlineJumpTables) {
+    // switch out of text section
+    O << ".section  " <<  FlagJumpTableSection << ",\"aMS\",%progbits,1\n";
+    O << ".align 4\n";
+    O << "\n\n";
+  }
+
+  // the table label
+  O << prefix.str() << ":\n";
 
   const char *JTEntryDirective = TAI->getJumpTableDirective();
   if (!JTEntryDirective)
@@ -715,24 +746,28 @@
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
     MachineBasicBlock *MBB = JTBBs[i];
     if (UseSet && JTSets.insert(MBB).second)
-      printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
+      printPICJumpTableSetLabel(JTI, uid, MBB);
 
     O << JTEntryDirective << ' ';
     if (UseSet)
-      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-        << '_' << JTI << '_' << MO2.getImm()
-        << "_set_" << MBB->getNumber();
+      O << prefix.str() << "_set_" << MBB->getNumber();
     else if (TM.getRelocationModel() == Reloc::PIC_) {
       printBasicBlockLabel(MBB, false, false, false);
       // If the arch uses custom Jump Table directives, don't calc relative to JT
       if (!TAI->getJumpTableDirective()) 
-        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
-          << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
+        O << '-' << prefix.str();
     } else
       printBasicBlockLabel(MBB, false, false, false);
     if (i != e-1)
       O << '\n';
   }
+
+  if (FlagOutlineJumpTables) {
+    // switch back into the text section
+    O << "\n\n";
+    O << ".text\n";
+    O << "\n\n";
+  }
 }
 
 
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp	(revision 72728)
+++ lib/Target/ARM/ARMISelLowering.cpp	(working copy)
@@ -36,9 +36,16 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MathExtras.h"
+
 using namespace llvm;
 
+
+
+cl::opt<bool> FlagOutlineJumpTables("outline-jumptables",
+                                    cl::desc("move jumptables from text to data"));
+
 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
@@ -1390,27 +1397,119 @@
   return Res;
 }
 
-SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+
+
+// Similar to LowerBR_JT_Inline except that the jumptable
+// is moved to the data segment.
+// This causes a extra load to access the table but keeps the
+// text segment small and avoids some issues with generating PIC
+// Needs to be activated with a commandline flag.
+static SDValue LowerBR_JT_Outline(SDValue Op,
+                                  SelectionDAG &DAG,
+                                  unsigned Num,
+                                  MVT PTy) {
+
   SDValue Chain = Op.getOperand(0);
-  SDValue Table = Op.getOperand(1);
-  SDValue Index = Op.getOperand(2);
-  DebugLoc dl = Op.getDebugLoc();
 
-  MVT PTy = getPointerTy();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op.getOperand(1));
+  const SDValue Index = Op.getOperand(2);
+
+
+  const DebugLoc dl = Op.getDebugLoc();
+
+  const SDValue UId =  DAG.getConstant(Num, PTy);
+  const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+
+  // create a new global symbol for the jumptable
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(".T", Num,
+                                          ARMCP::CPDataSegmentJumpTable);
+  const SDValue CPAddr = DAG.getTargetConstantPool(CPV, PTy, 4);
+
+  // An ARM idiosyncrasy: wrap each constant pool entry before accessing it
+  const SDValue Wrapper = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+  // Load Table start from constan pool
+  const SDValue Table = DAG.getLoad(PTy, dl, DAG.getEntryNode(), Wrapper, NULL, 0);
+
+  // table entries are 4 bytes, so multiple index by 4
+  const SDValue ScaledIndex = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+
+  // add scaled index to table beginning
+  const SDValue TabEntryAddr = DAG.getNode(ISD::ADD, dl, PTy, ScaledIndex, Table);
+  const SDValue JumpTarget = DAG.getLoad(PTy, dl, Chain, TabEntryAddr, NULL, 0);
+
+  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, JumpTarget, JTI, UId);
+}
+
+
+static SDValue LowerBR_JT_Inline(SDValue Op,
+                                 SelectionDAG &DAG,
+                                 bool isPIC,
+                                 MVT PTy) {
+  //  The Jumptable idiom we are aiming for looks somthing like this:
+  //
+  //          .set PCRELV0, (.LJTI9_0_0-(.LPCRELL0+8))
+  //  .LPCRELL0:
+  //          add r3, pc, #PCRELV0
+  //          ldr pc, [r3, +r0, lsl #2]
+  //  .LJTI9_0_0:
+  //          .long    .LBB9_2
+  //          .long    .LBB9_5
+  //          .long    .LBB9_7
+  //          .long    .LBB9_4
+  //          .long    .LBB9_8
+  //
+  // In pic mode the table entries are relative to table beginning
+  // requiring and extra addition
+  //
+  // The code generation logic for ARMISD::BR_JT will also
+  // emit the table (c.f. ARMAsmPrinter::printJTBlockOperand())
+  // Also check  "def BR_JTm" in ARMInstrInfo.td
+
+  // allocate constant pool entry
   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
-  SDValue UId =  DAG.getConstant(AFI->createJumpTableUId(), PTy);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
-  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
-  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
-  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
-  bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
-                     Chain, Addr, NULL, 0);
-  Chain = Addr.getValue(1);
-  if (isPIC)
-    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
-  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+
+  SDValue Chain = Op.getOperand(0);
+
+  const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op.getOperand(1));
+  const SDValue Index = Op.getOperand(2);
+  const DebugLoc dl = Op.getDebugLoc();
+
+
+  const SDValue UId =  DAG.getConstant(AFI->createJumpTableUId(), PTy);
+
+  const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+
+  // this uses pcrel magic to materialize the table start address
+  const SDValue Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+
+  // table entries are 4 bytes, so multiple index by 4
+   const SDValue ScaledIndex = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+
+  // add scaled index to table beginning
+  const SDValue TabEntryAddr = DAG.getNode(ISD::ADD, dl, PTy, ScaledIndex, Table);
+  SDValue JumpTarget;
+  if (isPIC) {
+     JumpTarget = DAG.getLoad((MVT)MVT::i32, dl, Chain, TabEntryAddr, NULL, 0);
+     Chain = JumpTarget.getValue(1);
+     // the table entry is not the actual target but relative to other stuff
+     // TODO: explain the exact magic here
+     JumpTarget = DAG.getNode(ISD::ADD, dl, PTy, JumpTarget, Table);
+  } else {
+    JumpTarget = DAG.getLoad(PTy, dl, Chain, TabEntryAddr, NULL, 0);
+    Chain = JumpTarget.getValue(1);
+  }
+
+  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, JumpTarget, JTI, UId);
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+  if (FlagOutlineJumpTables) {
+    return LowerBR_JT_Outline(Op, DAG, ++ARMJumpTableIndex, getPointerTy());
+  } else {
+    const bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+    return LowerBR_JT_Inline(Op, DAG, isPIC, getPointerTy());
+  }
 }
 
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
Index: lib/Target/ARM/ARMConstantPoolValue.cpp
===================================================================
--- lib/Target/ARM/ARMConstantPoolValue.cpp	(revision 72728)
+++ lib/Target/ARM/ARMConstantPoolValue.cpp	(working copy)
@@ -85,12 +85,26 @@
 }
 
 void ARMConstantPoolValue::print(raw_ostream &O) const {
+  // NOTE: this is not used for codegeneration, moreover
+  //       some of the logic is replicated in
+  //       EmitMachineConstantPoolValue()
+
   if (GV)
     O << GV->getName();
   else
     O << S;
-  if (isNonLazyPointer()) O << "$non_lazy_ptr";
-  else if (isStub()) O << "$stub";
+
+  if (isNonLazyPointer()) {
+    O << "$non_lazy_ptr";
+  } else if (isStub()) {
+    O << "$stub";
+  } else if (isDataSegmentJumpTable()) {
+    // requires synchronization with ARMAsmPrinter.cpp
+    O << "$jumptable$" << LabelId;
+  } else {
+    assert(isValue() && "unknown CP kind");
+  }
+
   if (Modifier) O << "(" << Modifier << ")";
   if (PCAdjust != 0) {
     O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;


More information about the llvm-dev mailing list