[llvm] [AArch64][PAC] Select auth+load into LDRAA/LDRAB/LDRA[pre]. (PR #123769)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 08:56:06 PST 2025
================
@@ -1671,6 +1673,163 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::tryAuthLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT VT = LD->getMemoryVT();
+ if (VT != MVT::i64)
+ return false;
+
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "invalid 64bit extload");
+
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM != ISD::PRE_INC && AM != ISD::UNINDEXED)
+ return false;
+ bool IsPre = AM == ISD::PRE_INC;
+
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ SDValue Base = Ptr;
+
+ int64_t OffsetVal = 0;
+ if (IsPre) {
+ OffsetVal = cast<ConstantSDNode>(LD->getOffset())->getSExtValue();
+ } else if (CurDAG->isBaseWithConstantOffset(Base)) {
+ // We support both 'base' and 'base + constant offset' modes.
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Base.getOperand(1));
+ if (!RHS)
+ return false;
+ OffsetVal = RHS->getSExtValue();
+ Base = Base.getOperand(0);
+ }
+
+ // The base must be of the form:
+ // (int_ptrauth_auth <signedbase>, <key>, <disc>)
+ // with disc being either a constant int, or:
+ // (int_ptrauth_blend <addrdisc>, <const int disc>)
+ if (Base.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return false;
+
+ unsigned IntID = cast<ConstantSDNode>(Base.getOperand(0))->getZExtValue();
+ if (IntID != Intrinsic::ptrauth_auth)
+ return false;
+
+ unsigned KeyC = cast<ConstantSDNode>(Base.getOperand(2))->getZExtValue();
+ bool IsDKey = KeyC == AArch64PACKey::DA || KeyC == AArch64PACKey::DB;
+ SDValue Disc = Base.getOperand(3);
+
+ Base = Base.getOperand(1);
+
+ bool ZeroDisc = isNullConstant(Disc);
+ SDValue IntDisc, AddrDisc;
+ std::tie(IntDisc, AddrDisc) = extractPtrauthBlendDiscriminators(Disc, CurDAG);
+
+ // If this is an indexed pre-inc load, we obviously need the writeback form.
+ bool needsWriteback = IsPre;
+ // If not, but the base authenticated pointer has any other use, it's
+ // beneficial to use the writeback form, to "writeback" the auth, even if
+ // there is no base+offset addition.
+ if (!Ptr.hasOneUse()) {
+ needsWriteback = true;
+
+ // However, we can only do that if we don't introduce cycles between the
+ // load node and any other user of the pointer computation nodes. That can
+ // happen if the load node uses any of said other users.
+ // In other words: we can only do this transformation if none of the other
+ // uses of the pointer computation to be folded are predecessors of the load
+ // we're folding into.
+ //
+ // Visited is a cache containing nodes that are known predecessors of N.
+ // Worklist is the set of nodes we're looking for predecessors of.
+ // For the first lookup, that only contains the load node N. Each call to
+ // hasPredecessorHelper adds any of the potential predecessors of N to the
+ // Worklist.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+ for (SDNode *U : Ptr.getNode()->users())
+ if (SDNode::hasPredecessorHelper(U, Visited, Worklist, /*Max=*/32,
+ /*TopologicalPrune=*/true))
+ return false;
+ }
+
+ // We have 2 main isel alternatives:
+ // - LDRAA/LDRAB, writeback or indexed. Zero disc, small offsets, D key.
+ // - LDRA/LDRApre. Pointer needs to be in X16.
+ SDLoc DL(N);
+ MachineSDNode *Res = nullptr;
+ SDValue Writeback, ResVal, OutChain;
+
+ // If the discriminator is zero and the offset fits, we can use LDRAA/LDRAB.
+ // Do that here to avoid needlessly constraining regalloc into using X16.
+ if (ZeroDisc && isShiftedInt<10, 3>(OffsetVal) && IsDKey) {
+ unsigned Opc = 0;
+ switch (KeyC) {
+ case AArch64PACKey::DA:
+ Opc = needsWriteback ? AArch64::LDRAAwriteback : AArch64::LDRAAindexed;
+ break;
+ case AArch64PACKey::DB:
+ Opc = needsWriteback ? AArch64::LDRABwriteback : AArch64::LDRABindexed;
+ break;
+ default:
+ llvm_unreachable("Invalid key for LDRAA/LDRAB");
+ }
+ // The offset is encoded as scaled, for an element size of 8 bytes.
+ SDValue Offset = CurDAG->getTargetConstant(OffsetVal / 8, DL, MVT::i64);
+ SDValue Ops[] = {Base, Offset, Chain};
+ Res = needsWriteback
+ ? CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::i64, MVT::Other,
+ Ops)
+ : CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::Other, Ops);
+ if (needsWriteback) {
+ Writeback = SDValue(Res, 0);
+ ResVal = SDValue(Res, 1);
+ OutChain = SDValue(Res, 2);
+ } else {
+ ResVal = SDValue(Res, 0);
+ OutChain = SDValue(Res, 1);
+ }
+ } else {
+ // Otherwise, use the generalized LDRA pseudos.
+ unsigned Opc = needsWriteback ? AArch64::LDRApre : AArch64::LDRA;
+
+ SDValue X16Copy =
+ CurDAG->getCopyToReg(Chain, DL, AArch64::X16, Base, SDValue());
+ SDValue Offset = CurDAG->getTargetConstant(OffsetVal, DL, MVT::i64);
+ SDValue Key = CurDAG->getTargetConstant(KeyC, DL, MVT::i32);
+ SDValue Ops[] = {Offset, Key, IntDisc, AddrDisc, X16Copy.getValue(1)};
+ Res = CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::Other, MVT::Glue, Ops);
+ if (needsWriteback)
+ Writeback = CurDAG->getCopyFromReg(SDValue(Res, 1), DL, AArch64::X16,
+ MVT::i64, SDValue(Res, 2));
+ ResVal = SDValue(Res, 0);
+ OutChain = SDValue(Res, 1);
----------------
atrosinenko wrote:
Do I understand correctly: strictly speaking, `OutChain` should be `SDValue(Writeback, 1)` if `needsWriteback` is set, but `Writeback` is scheduled "immediately after LDRApre" and all other successors are simply "after LDRApre", so everything is scheduled correctly without further complicating the computations of `OutChain`?
https://github.com/llvm/llvm-project/pull/123769
More information about the llvm-commits
mailing list