[llvm] [AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets (PR #71485)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 15 10:54:40 PST 2024


================
@@ -7566,6 +7566,100 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
   DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
 }
 
+// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
+static bool isEligibleToFoldADDIForLocalExecAccesses(SDNode *N,
+                                                     SelectionDAG *DAG,
+                                                     SDValue ADDIToFold) {
+  const PPCSubtarget &Subtarget =
+      DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+  // This optimization is only performed for non-TOC-based local-exec accesses.
+  if (!Subtarget.hasAIXSmallLocalExecTLS())
+    return false;
+
+  // Check if ADDIToFold (the ADDI that we want to fold into local-exec
+  // accesses), is truly an ADDI.
+  if (!ADDIToFold.isMachineOpcode() ||
+      (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
+    return false;
+
+  // The first operand of the ADDIToFold should be the thread pointer.
+  // This transformation is only performed if the first operand of the
+  // addi is the thread pointer.
+  SDValue TPRegNode = ADDIToFold.getOperand(0);
+  RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+  if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+    return false;
+
+  // The second operand of the ADDIToFold should be the global TLS address
+  // (the local-exec TLS variable). We only perform the folding if the TLS
+  // variable is the second operand.
+  SDValue TLSVarNode = ADDIToFold.getOperand(1);
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+  if (!GA)
+    return false;
+
+  // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
+  // so this optimization is not performed otherwise if the flag is not set.
+  unsigned TargetFlags = GA->getTargetFlags();
+  if (TargetFlags != PPCII::MO_TPREL_FLAG)
+    return false;
+
+  // If all conditions are satisfied, the ADDI is valid for folding.
+  return true;
+}
+
+// For non-TOC-based local-exec access where an addi is feeding into another
+// addi, fold this sequence into a single addi if possible.
+// Before this optimization, the sequence appears as:
+//    addi rN, r13, sym at le
+//    addi rM, rN, imm
+// After this optimization, we can fold the two addi into a single one:
+//    addi rM, r13, sym at le + imm
+static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+  if (N->getMachineOpcode() != PPC::ADDI8)
----------------
diggerlin wrote:

since most scenario are `N->getMachineOpcode() != PPC::ADDI8`

suggest that put the check outside of the function `foldADDIForLocalExecAccesses`

for example 
```
if (N->getMachineOpcode() == PPC::ADDI8)
    foldADDIForLocalExecAccesses(N, CurDAG);
```
to avoid most of the function call `foldADDIForLocalExecAccesses` overhead.


https://github.com/llvm/llvm-project/pull/71485


More information about the llvm-commits mailing list