[llvm] [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (PR #82130)

Manish Kausik H via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 17 12:56:04 PST 2024


https://github.com/Nirhar created https://github.com/llvm/llvm-project/pull/82130

Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. There was a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c

>From 2f0989e6026dab7e2cd21d539895b5efaac4ec99 Mon Sep 17 00:00:00 2001
From: Nirhar <hmanishkausik at gmail.com>
Date: Sun, 18 Feb 2024 01:34:51 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store/load to move AVX registers
 onto stack for `insertelement`

Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers
when the function was marked as a no-realign-stack function. This lead to misalignment
between the stack and the instruction generated. This patch fixes the issue. There was
a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 67 ++++++++++---------
 ...ligned-insert-into-vector-through-stack.ll | 18 +++++
 2 files changed, 53 insertions(+), 32 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 892bfbd62f0d02..e58adf867ac790 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -363,6 +363,19 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
   return Result;
 }
 
+// Helper function that generates an MMO that considers the alignment of the
+// stack, and the size of the stack object
+static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
+                                             MachineFunction &MF,
+                                             bool isObjectScalable) {
+  auto &MFI = MF.getFrameInfo();
+  int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+  uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
+  return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                 ObjectSize, MFI.getObjectAlign(FI));
+}
+
 /// Some target cannot handle a variable insertion index for the
 /// INSERT_VECTOR_ELT instruction.  In this case, it
 /// is necessary to spill the vector being inserted into to memory, perform
@@ -384,23 +397,23 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
   EVT VT    = Tmp1.getValueType();
   EVT EltVT = VT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VT);
-
-  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachineMemOperand *AlignedMMO = getStackAlignedMMO(
+      StackPtr, DAG.getMachineFunction(), EltVT.isScalableVector());
 
   // Store the vector.
-  SDValue Ch = DAG.getStore(
-      DAG.getEntryNode(), dl, Tmp1, StackPtr,
-      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
 
   SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
 
   // Store the scalar value.
-  Ch = DAG.getTruncStore(
-      Ch, dl, Tmp2, StackPtr2,
-      MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
-  // Load the updated vector.
-  return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
-                                               DAG.getMachineFunction(), SPFI));
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, EltVT, AlignedMMO);
+
+  Align ElementAlignment = std::min(cast<StoreSDNode>(Ch)->getAlign(),
+                                    DAG.getDataLayout().getPrefTypeAlign(
+                                        VT.getTypeForEVT(*DAG.getContext())));
+
+  return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo(),
+                     ElementAlignment);
 }
 
 SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
@@ -1378,19 +1391,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   }
 }
 
-// Helper function that generates an MMO that considers the alignment of the
-// stack, and the size of the stack object
-static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
-                                             MachineFunction &MF,
-                                             bool isObjectScalable) {
-  auto &MFI = MF.getFrameInfo();
-  int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
-  uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
-  return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
-                                 ObjectSize, MFI.getObjectAlign(FI));
-}
-
 SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   SDValue Vec = Op.getOperand(0);
   SDValue Idx = Op.getOperand(1);
@@ -1488,24 +1488,27 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
   EVT VecVT = Vec.getValueType();
   EVT SubVecVT = Part.getValueType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  MachinePointerInfo PtrInfo =
-      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+  MachineMemOperand *AlignedMMO = getStackAlignedMMO(
+      StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
 
   // First store the whole vector.
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
 
   // Then store the inserted part.
   SDValue SubStackPtr =
       TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
 
   // Store the subvector.
-  Ch = DAG.getStore(
-      Ch, dl, Part, SubStackPtr,
-      MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+  Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, AlignedMMO);
+
+  Align ElementAlignment =
+      std::min(cast<StoreSDNode>(Ch)->getAlign(),
+               DAG.getDataLayout().getPrefTypeAlign(
+                   Op.getValueType().getTypeForEVT(*DAG.getContext())));
 
   // Finally, load the updated vector.
-  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo(),
+                     ElementAlignment);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
diff --git a/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll b/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll
new file mode 100644
index 00000000000000..01e4d02acda18e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $7, %edi
+; CHECK-NEXT:    movl $42, -40(%rsp,%rdi,4)
+; CHECK-NEXT:    vmovups -{{[0-9]+}}(%rsp), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %a = insertelement <8 x i32> %arg1, i32 42, i32 %n
+  ret <8 x i32> %a
+}
+
+attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }



More information about the llvm-commits mailing list