[llvm] 96bbd35 - [AArch64][SVE] Only fold frame indexes referencing SVE objects into SVE loads/stores

Tue May 3 03:03:13 PDT 2022

Author: Bradley Smith
Date: 2022-05-03T09:48:13Z
New Revision: 96bbd359edbf8582fc2d29b57d7e65e54e98709b

URL: https://github.com/llvm/llvm-project/commit/96bbd359edbf8582fc2d29b57d7e65e54e98709b
DIFF: https://github.com/llvm/llvm-project/commit/96bbd359edbf8582fc2d29b57d7e65e54e98709b.diff

LOG: [AArch64][SVE] Only fold frame indexes referencing SVE objects into SVE loads/stores

Currently we always fold frame indexes into SVE load/store instructions,
however these instructions can only encode VL scaled offests. This means
that when we are accessing a fixed length stack object with these
instructions, the folded in frame index gets pulled back out during frame
lowering. This can cause issues when we have no spare registers and no
emergency spill slot.

Rather than causing issues like this, don't fold in frame indexes that
reference fixed length objects.

Fixes: #55041

Differential Revision: https://reviews.llvm.org/D124457

Added: 
    llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c367d2db853d3..71911b6bc6145 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5092,12 +5092,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
                                                    SDValue &OffImm) {
   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
   const DataLayout &DL = CurDAG->getDataLayout();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
-    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
-    return true;
+    // We can only encode VL scaled offsets, so only fold in frame indexes
+    // referencing SVE objects.
+    if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+      OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+      return true;
+    }
+
+    return false;
   }
 
   if (MemVT == EVT())
@@ -5124,7 +5131,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
   Base = N.getOperand(0);
   if (Base.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+    // We can only encode VL scaled offsets, so only fold in frame indexes
+    // referencing SVE objects.
+    if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
+      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
   }
 
   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
new file mode 100644
index 0000000000000..da11e6b36f35d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Ensure we don't crash by trying to fold fixed length frame indexes into
+; loads/stores that don't support an appropriate addressing mode, hence creating
+; too many extra vregs during frame lowering, when we don't have an emergency
+; spill slot.
+
+define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
+                             i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14,  i64* %v15, i64* %v16,
+                             i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
+                             i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32,
+                             i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
+                             i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48,
+                             i64 %v49) #0 {
+; CHECK-LABEL: func1
+  tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
+                        i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16,
+                        i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
+                        i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef,
+                        i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
+                        i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48,
+                        i64 undef)
+  ret void
+}
+
+declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+                              i64)
+
+attributes #0 = { "target-features"="+sve" vscale_range(2,2) }

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
new file mode 100644
index 0000000000000..9227c4caf0cd2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Ensure that only no offset frame indexes are folded into SVE load/stores when
+; accessing fixed width objects.
+define void @foo(<8 x i64>* %a) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       SelectionDAG has 14 nodes:
+; CHECK-NEXT:    t0: ch = EntryToken
+; CHECK-NEXT:    t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
+; CHECK-NEXT:    t2: i64,ch = CopyFromReg t0, Register:i64 %0
+; CHECK-NEXT:    t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
+; CHECK-NEXT:    t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
+; CHECK-NEXT:    t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0
+; CHECK-NEXT:    t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
+; CHECK-NEXT:    t10: ch = RET_ReallyLR t16
+; CHECK-EMPTY:
+entry:
+  %r0 = alloca <8 x i64>
+  %r1 = alloca <8 x i64>
+  %r = load volatile <8 x i64>, <8 x i64>* %a
+  store volatile <8 x i64> %r, <8 x i64>* %r0
+  store volatile <8 x i64> %r, <8 x i64>* %r1
+  ret void
+}
+
+attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) }