[llvm] [llvm][ScheduleDAG] Re-arrange SUnit's members to make it smaller (PR #94547)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 5 17:04:55 PDT 2024


https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/94547

None

>From 59b318d6297474a5c94bea162d4f1eb95b4fd5cf Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 5 Jun 2024 12:08:38 -0700
Subject: [PATCH 1/2] [llvm][ScheduleDAG] Set a fixed size for
 Sched::Preference

This trims off 8 bytes from llvm::SUnit:

--- before	2024-06-05 12:13:00
+++ after	2024-06-05 12:12:58
@@ -1,65 +1,65 @@
 *** Dumping AST Record Layout
          0 | class llvm::SUnit
          0 |   SDNode * Node
          8 |   MachineInstr * Instr
         16 |   SUnit * OrigNode
         24 |   const MCSchedClassDesc * SchedClass
         32 |   class llvm::SmallVector<class llvm::SDep, 4> Preds
         32 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
         32 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
         32 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
         32 |           class llvm::SmallVectorBase<uint32_t> (base)
         32 |             void * BeginX
         40 |             unsigned int Size
         44 |             unsigned int Capacity
         48 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
         48 |       char[64] InlineElts
        112 |   class llvm::SmallVector<class llvm::SDep, 4> Succs
        112 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
        112 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
        112 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
        112 |           class llvm::SmallVectorBase<uint32_t> (base)
        112 |             void * BeginX
        120 |             unsigned int Size
        124 |             unsigned int Capacity
        128 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
        128 |       char[64] InlineElts
        192 |   unsigned int NodeNum
        196 |   unsigned int NodeQueueId
        200 |   unsigned int NumPreds
        204 |   unsigned int NumSuccs
        208 |   unsigned int NumPredsLeft
        212 |   unsigned int NumSuccsLeft
        216 |   unsigned int WeakPredsLeft
        220 |   unsigned int WeakSuccsLeft
        224 |   unsigned short NumRegDefsLeft
        226 |   unsigned short Latency
    228:0-0 |   _Bool isVRegCycle
    228:1-1 |   _Bool isCall
    228:2-2 |   _Bool isCallOp
    228:3-3 |   _Bool isTwoAddress
    228:4-4 |   _Bool isCommutable
    228:5-5 |   _Bool hasPhysRegUses
    228:6-6 |   _Bool hasPhysRegDefs
    228:7-7 |   _Bool hasPhysRegClobbers
    229:0-0 |   _Bool isPending
    229:1-1 |   _Bool isAvailable
    229:2-2 |   _Bool isScheduled
    229:3-3 |   _Bool isScheduleHigh
    229:4-4 |   _Bool isScheduleLow
    229:5-5 |   _Bool isCloned
    229:6-6 |   _Bool isUnbuffered
    229:7-7 |   _Bool hasReservedResource
-       232 |   Sched::Preference SchedulingPref
-   236:0-0 |   _Bool isDepthCurrent
-   236:1-1 |   _Bool isHeightCurrent
-       240 |   unsigned int Depth
-       244 |   unsigned int Height
-       248 |   unsigned int TopReadyCycle
-       252 |   unsigned int BotReadyCycle
-       256 |   const TargetRegisterClass * CopyDstRC
-       264 |   const TargetRegisterClass * CopySrcRC
-           | [sizeof=272, dsize=272, align=8,
-           |  nvsize=272, nvalign=8]
+       230 |   Sched::Preference SchedulingPref
+   231:0-0 |   _Bool isDepthCurrent
+   231:1-1 |   _Bool isHeightCurrent
+       232 |   unsigned int Depth
+       236 |   unsigned int Height
+       240 |   unsigned int TopReadyCycle
+       244 |   unsigned int BotReadyCycle
+       248 |   const TargetRegisterClass * CopyDstRC
+       256 |   const TargetRegisterClass * CopySrcRC
+           | [sizeof=264, dsize=264, align=8,
+           |  nvsize=264, nvalign=8]

-
+**>
---
 llvm/include/llvm/CodeGen/TargetLowering.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d1912b1c4c0f6..aa7a32e86ad83 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -96,7 +96,7 @@ class Value;
 
 namespace Sched {
 
-enum Preference {
+enum Preference : uint8_t {
   None,        // No preference
   Source,      // Follow source order.
   RegPressure, // Scheduling for lowest register pressure.

>From 25196f6b9afbcd6ee8a56cee29f6d8271faaf3b7 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 5 Jun 2024 17:00:07 -0700
Subject: [PATCH 2/2] [llvm][ScheduleDAG] Re-arrange SUnit's members to make it
 smaller

before:
```
*** Dumping AST Record Layout
         0 | class llvm::SUnit
         0 |   SDNode * Node
         8 |   MachineInstr * Instr
        16 |   SUnit * OrigNode
        24 |   const MCSchedClassDesc * SchedClass
        32 |   class llvm::SmallVector<class llvm::SDep, 4> Preds
        32 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
        32 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
        32 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
        32 |           class llvm::SmallVectorBase<uint32_t> (base)
        32 |             void * BeginX
        40 |             unsigned int Size
        44 |             unsigned int Capacity
        48 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
        48 |       char[64] InlineElts
       112 |   class llvm::SmallVector<class llvm::SDep, 4> Succs
       112 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
       112 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
       112 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
       112 |           class llvm::SmallVectorBase<uint32_t> (base)
       112 |             void * BeginX
       120 |             unsigned int Size
       124 |             unsigned int Capacity
       128 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
       128 |       char[64] InlineElts
       192 |   unsigned int NodeNum
       196 |   unsigned int NodeQueueId
       200 |   unsigned int NumPreds
       204 |   unsigned int NumSuccs
       208 |   unsigned int NumPredsLeft
       212 |   unsigned int NumSuccsLeft
       216 |   unsigned int WeakPredsLeft
       220 |   unsigned int WeakSuccsLeft
       224 |   unsigned short NumRegDefsLeft
       226 |   unsigned short Latency
   228:0-0 |   _Bool isVRegCycle
   228:1-1 |   _Bool isCall
   228:2-2 |   _Bool isCallOp
   228:3-3 |   _Bool isTwoAddress
   228:4-4 |   _Bool isCommutable
   228:5-5 |   _Bool hasPhysRegUses
   228:6-6 |   _Bool hasPhysRegDefs
   228:7-7 |   _Bool hasPhysRegClobbers
   229:0-0 |   _Bool isPending
   229:1-1 |   _Bool isAvailable
   229:2-2 |   _Bool isScheduled
   229:3-3 |   _Bool isScheduleHigh
   229:4-4 |   _Bool isScheduleLow
   229:5-5 |   _Bool isCloned
   229:6-6 |   _Bool isUnbuffered
   229:7-7 |   _Bool hasReservedResource
       232 |   Sched::Preference SchedulingPref
   236:0-0 |   _Bool isDepthCurrent
   236:1-1 |   _Bool isHeightCurrent
       240 |   unsigned int Depth
       244 |   unsigned int Height
       248 |   unsigned int TopReadyCycle
       252 |   unsigned int BotReadyCycle
       256 |   const TargetRegisterClass * CopyDstRC
       264 |   const TargetRegisterClass * CopySrcRC
           | [sizeof=272, dsize=272, align=8,
           |  nvsize=272, nvalign=8]
```

after:
```
*** Dumping AST Record Layout
         0 | class llvm::SUnit
         0 |   union llvm::SUnit::(anonymous at /Users/jonathan_roelofs/llvm-upstream/llvm/include/llvm/CodeGen/ScheduleDAG.h:246:5)
         0 |     SDNode * Node
         0 |     MachineInstr * Instr
         8 |   SUnit * OrigNode
        16 |   const MCSchedClassDesc * SchedClass
        24 |   const TargetRegisterClass * CopyDstRC
        32 |   const TargetRegisterClass * CopySrcRC
        40 |   class llvm::SmallVector<class llvm::SDep, 4> Preds
        40 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
        40 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
        40 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
        40 |           class llvm::SmallVectorBase<uint32_t> (base)
        40 |             void * BeginX
        48 |             unsigned int Size
        52 |             unsigned int Capacity
        56 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
        56 |       char[64] InlineElts
       120 |   class llvm::SmallVector<class llvm::SDep, 4> Succs
       120 |     class llvm::SmallVectorImpl<class llvm::SDep> (base)
       120 |       class llvm::SmallVectorTemplateBase<class llvm::SDep> (base)
       120 |         class llvm::SmallVectorTemplateCommon<class llvm::SDep> (base)
       120 |           class llvm::SmallVectorBase<uint32_t> (base)
       120 |             void * BeginX
       128 |             unsigned int Size
       132 |             unsigned int Capacity
       136 |     struct llvm::SmallVectorStorage<class llvm::SDep, 4> (base)
       136 |       char[64] InlineElts
       200 |   unsigned int NodeNum
       204 |   unsigned int NodeQueueId
       208 |   unsigned int NumPreds
       212 |   unsigned int NumSuccs
       216 |   unsigned int NumPredsLeft
       220 |   unsigned int NumSuccsLeft
       224 |   unsigned int WeakPredsLeft
       228 |   unsigned int WeakSuccsLeft
       232 |   unsigned int TopReadyCycle
       236 |   unsigned int BotReadyCycle
       240 |   unsigned int Depth
       244 |   unsigned int Height
   248:0-0 |   _Bool isVRegCycle
   248:1-1 |   _Bool isCall
   248:2-2 |   _Bool isCallOp
   248:3-3 |   _Bool isTwoAddress
   248:4-4 |   _Bool isCommutable
   248:5-5 |   _Bool hasPhysRegUses
   248:6-6 |   _Bool hasPhysRegDefs
   248:7-7 |   _Bool hasPhysRegClobbers
   249:0-0 |   _Bool isPending
   249:1-1 |   _Bool isAvailable
   249:2-2 |   _Bool isScheduled
   249:3-3 |   _Bool isScheduleHigh
   249:4-4 |   _Bool isScheduleLow
   249:5-5 |   _Bool isCloned
   249:6-6 |   _Bool isUnbuffered
   249:7-7 |   _Bool hasReservedResource
       250 |   unsigned short NumRegDefsLeft
       252 |   unsigned short Latency
   254:0-0 |   _Bool isDepthCurrent
   254:1-1 |   _Bool isHeightCurrent
   254:2-2 |   _Bool isNode
   254:3-3 |   _Bool isInst
   254:4-7 |   Sched::Preference SchedulingPref
           | [sizeof=256, dsize=255, align=8,
           |  nvsize=255, nvalign=8]
```
---
 llvm/include/llvm/CodeGen/ScheduleDAG.h    | 92 +++++++++++++---------
 llvm/include/llvm/CodeGen/TargetLowering.h |  3 +-
 2 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index c5172e8c542b7..f9951a8d52479 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -243,8 +243,10 @@ class TargetRegisterInfo;
   private:
     enum : unsigned { BoundaryID = ~0u };
 
-    SDNode *Node = nullptr;        ///< Representative node.
-    MachineInstr *Instr = nullptr; ///< Alternatively, a MachineInstr.
+    union {
+      SDNode *Node;        ///< Representative node.
+      MachineInstr *Instr; ///< Alternatively, a MachineInstr.
+    };
 
   public:
     SUnit *OrigNode = nullptr; ///< If not this, the node from which this node
@@ -253,6 +255,10 @@ class TargetRegisterInfo;
     const MCSchedClassDesc *SchedClass =
         nullptr; ///< nullptr or resolved SchedClass.
 
+    const TargetRegisterClass *CopyDstRC =
+        nullptr; ///< Is a special copy node if != nullptr.
+    const TargetRegisterClass *CopySrcRC = nullptr;
+
     SmallVector<SDep, 4> Preds;  ///< All sunit predecessors.
     SmallVector<SDep, 4> Succs;  ///< All sunit successors.
 
@@ -269,8 +275,14 @@ class TargetRegisterInfo;
     unsigned NumSuccsLeft = 0;         ///< # of succs not scheduled.
     unsigned WeakPredsLeft = 0;        ///< # of weak preds not scheduled.
     unsigned WeakSuccsLeft = 0;        ///< # of weak succs not scheduled.
-    unsigned short NumRegDefsLeft = 0; ///< # of reg defs with no scheduled use.
-    unsigned short Latency = 0;        ///< Node latency.
+    unsigned TopReadyCycle = 0; ///< Cycle relative to start when node is ready.
+    unsigned BotReadyCycle = 0; ///< Cycle relative to end when node is ready.
+
+  private:
+    unsigned Depth = 0;  ///< Node depth.
+    unsigned Height = 0; ///< Node height.
+
+  public:
     bool isVRegCycle      : 1;         ///< May use and def the same vreg.
     bool isCall           : 1;         ///< Is a function call.
     bool isCallOp         : 1;         ///< Is a function call operand.
@@ -287,52 +299,54 @@ class TargetRegisterInfo;
     bool isCloned         : 1;         ///< True if this node has been cloned.
     bool isUnbuffered     : 1;         ///< Uses an unbuffered resource.
     bool hasReservedResource : 1;      ///< Uses a reserved resource.
-    Sched::Preference SchedulingPref = Sched::None; ///< Scheduling preference.
+    unsigned short NumRegDefsLeft = 0; ///< # of reg defs with no scheduled use.
+    unsigned short Latency = 0;        ///< Node latency.
 
   private:
     bool isDepthCurrent   : 1;         ///< True if Depth is current.
     bool isHeightCurrent  : 1;         ///< True if Height is current.
-    unsigned Depth = 0;                ///< Node depth.
-    unsigned Height = 0;               ///< Node height.
+    bool isNode : 1; ///< True if the representative is an SDNode
+    bool isInst : 1; ///< True if the representative is a MachineInstr
 
   public:
-    unsigned TopReadyCycle = 0; ///< Cycle relative to start when node is ready.
-    unsigned BotReadyCycle = 0; ///< Cycle relative to end when node is ready.
-
-    const TargetRegisterClass *CopyDstRC =
-        nullptr; ///< Is a special copy node if != nullptr.
-    const TargetRegisterClass *CopySrcRC = nullptr;
+    Sched::Preference SchedulingPref : 4; ///< Scheduling preference.
+    static_assert(Sched::Preference::Last <= (1 << 4),
+                  "not enough bits in bitfield");
 
     /// Constructs an SUnit for pre-regalloc scheduling to represent an
     /// SDNode and any nodes flagged to it.
     SUnit(SDNode *node, unsigned nodenum)
-      : Node(node), NodeNum(nodenum), isVRegCycle(false), isCall(false),
-        isCallOp(false), isTwoAddress(false), isCommutable(false),
-        hasPhysRegUses(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
-        isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
-        isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false),
-        isHeightCurrent(false) {}
+        : Node(node), NodeNum(nodenum), isVRegCycle(false), isCall(false),
+          isCallOp(false), isTwoAddress(false), isCommutable(false),
+          hasPhysRegUses(false), hasPhysRegDefs(false),
+          hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+          isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+          isCloned(false), isUnbuffered(false), hasReservedResource(false),
+          isDepthCurrent(false), isHeightCurrent(false), isNode(true),
+          isInst(false), SchedulingPref(Sched::None) {}
 
     /// Constructs an SUnit for post-regalloc scheduling to represent a
     /// MachineInstr.
     SUnit(MachineInstr *instr, unsigned nodenum)
-      : Instr(instr), NodeNum(nodenum), isVRegCycle(false), isCall(false),
-        isCallOp(false), isTwoAddress(false), isCommutable(false),
-        hasPhysRegUses(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
-        isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
-        isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false),
-        isHeightCurrent(false) {}
+        : Instr(instr), NodeNum(nodenum), isVRegCycle(false), isCall(false),
+          isCallOp(false), isTwoAddress(false), isCommutable(false),
+          hasPhysRegUses(false), hasPhysRegDefs(false),
+          hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+          isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+          isCloned(false), isUnbuffered(false), hasReservedResource(false),
+          isDepthCurrent(false), isHeightCurrent(false), isNode(false),
+          isInst(true), SchedulingPref(Sched::None) {}
 
     /// Constructs a placeholder SUnit.
     SUnit()
-      : isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
-        isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), isUnbuffered(false), hasReservedResource(false),
-        isDepthCurrent(false), isHeightCurrent(false) {}
+        : Node(nullptr), isVRegCycle(false), isCall(false), isCallOp(false),
+          isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+          hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+          isAvailable(false), isScheduled(false), isScheduleHigh(false),
+          isScheduleLow(false), isCloned(false), isUnbuffered(false),
+          hasReservedResource(false), isDepthCurrent(false),
+          isHeightCurrent(false), isNode(false), isInst(false),
+          SchedulingPref(Sched::None) {}
 
     /// Boundary nodes are placeholders for the boundary of the
     /// scheduling region.
@@ -346,32 +360,34 @@ class TargetRegisterInfo;
     /// Assigns the representative SDNode for this SUnit. This may be used
     /// during pre-regalloc scheduling.
     void setNode(SDNode *N) {
-      assert(!Instr && "Setting SDNode of SUnit with MachineInstr!");
+      assert(!isInst && "Setting SDNode of SUnit with MachineInstr!");
       Node = N;
+      isNode = true;
     }
 
     /// Returns the representative SDNode for this SUnit. This may be used
     /// during pre-regalloc scheduling.
     SDNode *getNode() const {
-      assert(!Instr && "Reading SDNode of SUnit with MachineInstr!");
+      assert(!isInst && "Reading SDNode of SUnit with MachineInstr!");
       return Node;
     }
 
     /// Returns true if this SUnit refers to a machine instruction as
     /// opposed to an SDNode.
-    bool isInstr() const { return Instr; }
+    bool isInstr() const { return isInst; }
 
     /// Assigns the instruction for the SUnit. This may be used during
     /// post-regalloc scheduling.
     void setInstr(MachineInstr *MI) {
-      assert(!Node && "Setting MachineInstr of SUnit with SDNode!");
+      assert(!isNode && "Setting MachineInstr of SUnit with SDNode!");
       Instr = MI;
+      isInst = true;
     }
 
     /// Returns the representative MachineInstr for this SUnit. This may be used
     /// during post-regalloc scheduling.
     MachineInstr *getInstr() const {
-      assert(!Node && "Reading MachineInstr of SUnit with SDNode!");
+      assert(!isNode && "Reading MachineInstr of SUnit with SDNode!");
       return Instr;
     }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index aa7a32e86ad83..fc8de4a649980 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -104,7 +104,8 @@ enum Preference : uint8_t {
   ILP,         // Scheduling for ILP in low register pressure mode.
   VLIW,        // Scheduling for VLIW targets.
   Fast,        // Fast suboptimal list scheduling
-  Linearize    // Linearize DAG, no scheduling
+  Linearize,   // Linearize DAG, no scheduling
+  Last = Linearize  // Marker for the last Sched::Preference
 };
 
 } // end namespace Sched



More information about the llvm-commits mailing list