[llvm] [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (PR #82130)
Manish Kausik H via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 09:40:28 PDT 2024
https://github.com/Nirhar updated https://github.com/llvm/llvm-project/pull/82130
>From caca20a97464162d706308eb242b798fbef71b4f Mon Sep 17 00:00:00 2001
From: Manish Kausik H <hmamishkausik at gmail.com>
Date: Tue, 16 Jul 2024 18:05:48 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store/load to move AVX registers
onto stack for `insertelement`
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers
when the function was marked as a no-realign-stack function. This lead to misalignment
between the stack and the instruction generated. This patch fixes the issue. There was
a similar issue reported for `extractelement` which was fixed in #a6614ec5b7c1dbfc4b847884c5de780cf75e8e9c
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 27 ++++++++++---------
...ligned-insert-into-vector-through-stack.ll | 18 +++++++++++++
2 files changed, 33 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 9f515739ee048..68e0cc4b69263 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1469,12 +1469,11 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
EVT VecVT = Vec.getValueType();
EVT PartVT = Part.getValueType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ MachineMemOperand *AlignedMMO = getStackAlignedMMO(
+ StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
// First store the whole vector.
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, AlignedMMO);
// Freeze the index so we don't poison the clamping code we're about to emit.
Idx = DAG.getFreeze(Idx);
@@ -1485,22 +1484,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx);
// Store the subvector.
- Ch = DAG.getStore(
- Ch, dl, Part, SubStackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, AlignedMMO);
} else {
SDValue SubStackPtr =
TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
// Store the scalar value.
- Ch = DAG.getTruncStore(
- Ch, dl, Part, SubStackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
- VecVT.getVectorElementType());
+ Ch = DAG.getTruncStore(Ch, dl, Part, SubStackPtr,
+ VecVT.getVectorElementType(), AlignedMMO);
}
+ Align ElementAlignment =
+ std::min(cast<StoreSDNode>(Ch)->getAlign(),
+ DAG.getSubtarget().getFrameLowering()->getStackAlign());
+
// Finally, load the updated vector.
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ ElementAlignment);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
diff --git a/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll b/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll
new file mode 100644
index 0000000000000..01e4d02acda18
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned-insert-into-vector-through-stack.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: andl $7, %edi
+; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
+; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %a = insertelement <8 x i32> %arg1, i32 42, i32 %n
+ ret <8 x i32> %a
+}
+
+attributes #0 = { "no-realign-stack" "target-cpu"="haswell" }
More information about the llvm-commits
mailing list