[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
Sander de Smalen via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 15 07:56:40 PST 2023
================
@@ -4850,6 +4852,93 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
Mask);
}
+// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+// Case 1: If the vector number (vecnum) is an immediate in range, it gets
+// folded into the instruction
+// ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
+// Case 2: If the vecnum is not an immediate, then it is used to modify the base
+// and tile slice registers
+// ldr(%tileslice, %ptr, %vecnum)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * %vecnum
+// %tileslice2 = %tileslice + %vecnum
+// ldr [%tileslice2, 0], [%ptr2, 0]
+// Case 3: If the vecnum is an immediate out of range, then the same is done as
+// case 2, but the base and slice registers are modified by the greatest
+// multiple of 15 lower than the vecnum and the remainder is folded into the
+// instruction. This means that successive loads and stores that are offset from
+// each other can share the same base and slice register updates.
+// ldr(%tileslice, %ptr, 22)
+// ldr(%tileslice, %ptr, 23)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * 15
+// %tileslice2 = %tileslice + 15
+// ldr [%tileslice2, 7], [%ptr2, 7]
+// ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 4: If the vecnum is an add of an immediate, then the non-immediate
+// operand and the immediate can be folded into the instruction, like case 2.
+// ldr(%tileslice, %ptr, %vecnum + 7)
+// ldr(%tileslice, %ptr, %vecnum + 8)
+// ->
+// %svl = rdsvl
+// %ptr2 = %ptr + %svl * %vecnum
+// %tileslice2 = %tileslice + %vecnum
+// ldr [%tileslice2, 7], [%ptr2, 7]
+// ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 5: The vecnum being an add of an immediate out of range is also handled,
+// in which case the same remainder logic as case 3 is used.
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+ SDLoc DL(N);
+
+ SDValue TileSlice = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue VecNum = N->getOperand(4);
+ int32_t ConstAddend = 0;
+ SDValue VarAddend = VecNum;
+
+ // If the vnum is an add of an immediate, we can fold it into the instruction
+ if (VecNum.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(VecNum.getOperand(1))) {
+ ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue();
+ VarAddend = VecNum.getOperand(0);
+ } else if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
+ ConstAddend = ImmNode->getSExtValue();
+ VarAddend = SDValue();
+ }
+
+ int32_t ImmAddend = ConstAddend % 16;
+ if (int32_t C = (ConstAddend - ImmAddend)) {
+ SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32);
+ VarAddend = VarAddend
+ ? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal})
+ : CVal;
+ }
+
+ if (VarAddend) {
+ // Get the vector length that will be multiplied by vnum
+ auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getConstant(1, DL, MVT::i32));
+
+ // Multiply SVL and vnum then add it to the base
+ SDValue Mul = DAG.getNode(
+ ISD::MUL, DL, MVT::i64,
+ {SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)});
+ Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul});
+ // Just add vnum to the tileslice
+ TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend});
+ }
+
+ SmallVector<SDValue, 4> Ops = {
+ /*Chain=*/N.getOperand(0), TileSlice, Base,
+ DAG.getTargetConstant(ImmAddend, DL, MVT::i32)};
----------------
sdesmalen-arm wrote:
nit: you might as well inline Ops into the expression below.
https://github.com/llvm/llvm-project/pull/68565
More information about the cfe-commits
mailing list