[llvm] [AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets (PR #71485)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 11:55:52 PST 2024
================
@@ -7566,6 +7566,100 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}
+// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
+static bool isEligibleToFoldADDIForLocalExecAccesses(SDNode *N,
+ SelectionDAG *DAG,
+ SDValue ADDIToFold) {
+ const PPCSubtarget &Subtarget =
+ DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+ // This optimization is only performed for non-TOC-based local-exec accesses.
+ if (!Subtarget.hasAIXSmallLocalExecTLS())
+ return false;
+
+ // Check if ADDIToFold (the ADDI that we want to fold into local-exec
+ // accesses), is truly an ADDI.
+ if (!ADDIToFold.isMachineOpcode() ||
+ (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
+ return false;
+
+ // The first operand of the ADDIToFold should be the thread pointer.
+ // This transformation is only performed if the first operand of the
+ // addi is the thread pointer.
+ SDValue TPRegNode = ADDIToFold.getOperand(0);
+ RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+ if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+ return false;
+
+ // The second operand of the ADDIToFold should be the global TLS address
+ // (the local-exec TLS variable). We only perform the folding if the TLS
+ // variable is the second operand.
+ SDValue TLSVarNode = ADDIToFold.getOperand(1);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+ if (!GA)
+ return false;
+
+ // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
+ // so this optimization is not performed otherwise if the flag is not set.
+ unsigned TargetFlags = GA->getTargetFlags();
+ if (TargetFlags != PPCII::MO_TPREL_FLAG)
+ return false;
+
+ // If all conditions are satisfied, the ADDI is valid for folding.
+ return true;
+}
+
+// For non-TOC-based local-exec access where an addi is feeding into another
+// addi, fold this sequence into a single addi if possible.
+// Before this optimization, the sequence appears as:
+// addi rN, r13, sym at le
+// addi rM, rN, imm
+// After this optimization, we can fold the two addi into a single one:
+// addi rM, r13, sym at le + imm
+static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+ if (N->getMachineOpcode() != PPC::ADDI8)
+ return;
+
+ // InitialADDI is the addi feeding into N (also an addi), and the addi that
+ // we want optimized out.
+ SDValue InitialADDI = N->getOperand(0);
+
+ if (!isEligibleToFoldADDIForLocalExecAccesses(N, DAG, InitialADDI))
+ return;
+
+ // At this point, InitialADDI can be folded into a non-TOC-based local-exec
+ // access. The first operand of InitialADDI should be the thread pointer,
+ // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+ SDValue TPRegNode = InitialADDI.getOperand(0);
+ RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+ const PPCSubtarget &Subtarget =
+ DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+ assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
+ "Expecting the first operand to be a thread pointer for folding addi "
+ "in local-exec accesses!");
+
+ // The second operand of the InitialADDI should be the global TLS address
+ // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
+ // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+ SDValue TLSVarNode = InitialADDI.getOperand(1);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+ assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
----------------
diggerlin wrote:
return false from `isEligibleToFoldADDIForLocalExecAccesses`, do we need assert here ?
https://github.com/llvm/llvm-project/pull/71485
More information about the llvm-commits
mailing list