[llvm] [AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets (PR #71485)

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 12:10:26 PST 2024


================
@@ -7566,6 +7566,100 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
   DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
 }
 
+// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
+static bool isEligibleToFoldADDIForLocalExecAccesses(SDNode *N,
+                                                     SelectionDAG *DAG,
+                                                     SDValue ADDIToFold) {
+  const PPCSubtarget &Subtarget =
+      DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+  // This optimization is only performed for non-TOC-based local-exec accesses.
+  if (!Subtarget.hasAIXSmallLocalExecTLS())
+    return false;
+
+  // Check if ADDIToFold (the ADDI that we want to fold into local-exec
+  // accesses), is truly an ADDI.
+  if (!ADDIToFold.isMachineOpcode() ||
+      (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
+    return false;
+
+  // The first operand of the ADDIToFold should be the thread pointer.
+  // This transformation is only performed if the first operand of the
+  // addi is the thread pointer.
+  SDValue TPRegNode = ADDIToFold.getOperand(0);
+  RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+  if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+    return false;
+
+  // The second operand of the ADDIToFold should be the global TLS address
+  // (the local-exec TLS variable). We only perform the folding if the TLS
+  // variable is the second operand.
+  SDValue TLSVarNode = ADDIToFold.getOperand(1);
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+  if (!GA)
+    return false;
+
+  // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
+  // so this optimization is not performed otherwise if the flag is not set.
+  unsigned TargetFlags = GA->getTargetFlags();
+  if (TargetFlags != PPCII::MO_TPREL_FLAG)
+    return false;
+
+  // If all conditions are satisfied, the ADDI is valid for folding.
+  return true;
+}
+
+// For non-TOC-based local-exec access where an addi is feeding into another
+// addi, fold this sequence into a single addi if possible.
+// Before this optimization, the sequence appears as:
+//    addi rN, r13, sym at le
+//    addi rM, rN, imm
+// After this optimization, we can fold the two addi into a single one:
+//    addi rM, r13, sym at le + imm
+static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+  if (N->getMachineOpcode() != PPC::ADDI8)
+    return;
+
+  // InitialADDI is the addi feeding into N (also an addi), and the addi that
+  // we want optimized out.
+  SDValue InitialADDI = N->getOperand(0);
+
+  if (!isEligibleToFoldADDIForLocalExecAccesses(N, DAG, InitialADDI))
+    return;
+
+  // At this point, InitialADDI can be folded into a non-TOC-based local-exec
+  // access. The first operand of InitialADDI should be the thread pointer,
+  // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+  SDValue TPRegNode = InitialADDI.getOperand(0);
+  RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+  const PPCSubtarget &Subtarget =
+      DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+  assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
+         "Expecting the first operand to be a thread pointer for folding addi "
+         "in local-exec accesses!");
+
+  // The second operand of the InitialADDI should be the global TLS address
+  // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
+  // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+  SDValue TLSVarNode = InitialADDI.getOperand(1);
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+  assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
----------------
amy-kwan wrote:

Like the TPRegNode in  above in this function, if I remember correctly, this was also a previous suggestion, to add the assert here just in case the checks in `isEligibleToFoldADDIForLocalExecAccesses( )` changes in the future.

https://github.com/llvm/llvm-project/pull/71485


More information about the llvm-commits mailing list