[llvm] a768bc6 - [SelectionDAG] Use unaligned store to move AVX registers onto stack for `extractelement` (#78422)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 09:19:35 PST 2024
Author: Manish Kausik H
Date: 2024-02-02T22:49:31+05:30
New Revision: a768bc6ef6a0c1a7365134505fdfcaeeaaffdb41
URL: https://github.com/llvm/llvm-project/commit/a768bc6ef6a0c1a7365134505fdfcaeeaaffdb41
DIFF: https://github.com/llvm/llvm-project/commit/a768bc6ef6a0c1a7365134505fdfcaeeaaffdb41.diff
LOG: [SelectionDAG] Use unaligned store to move AVX registers onto stack for `extractelement` (#78422)
Prior to this patch, SelectionDAG generated aligned move onto stacks for
AVX registers when the function was marked as a no-realign-stack
function. This lead to misalignment between the stack and the
instruction generated. This patch fixes the issue.
Fixes #77730
Added:
llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d29e44f95798c..472b4a2d439a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -1377,6 +1378,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
}
+// Helper function that generates an MMO that considers the alignment of the
+// stack, and the size of the stack object
+static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
+ MachineFunction &MF,
+ bool isObjectScalable) {
+ auto &MFI = MF.getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, ObjectSize, MFI.getObjectAlign(FI));
+
+ return MMO;
+}
+
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
SDValue Vec = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
@@ -1426,8 +1442,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (!Ch.getNode()) {
// Store the value to a temporary stack slot, then LOAD the returned part.
StackPtr = DAG.CreateStackTemporary(VecVT);
- Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo());
+ MachineMemOperand *StoreMMO = getStackAlignedMMO(
+ StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
}
SDValue NewLoad;
diff --git a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
new file mode 100644
index 0000000000000..52d0c2b509128
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define i32 @foo(i32 %arg1) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: andl $31, %edi
+; CHECK-NEXT: movzbl -40(%rsp,%rdi), %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = extractelement <32 x i8> zeroinitializer, i32 %arg1
+ %b = zext i8 %a to i32
+ ret i32 %b
+}
+
+attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }
More information about the llvm-commits
mailing list