[llvm] [SelectionDAG] Use unaligned store to move AVX registers onto stack for `extractelement` (PR #78422)

Manish Kausik H via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 22:33:27 PST 2024


https://github.com/Nirhar updated https://github.com/llvm/llvm-project/pull/78422

>From f2b0b1b7f5369850a58f9e26d392ba840fb9c0af Mon Sep 17 00:00:00 2001
From: Nirhar <hmanishkausik at gmail.com>
Date: Wed, 17 Jan 2024 15:46:06 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store to move AVX registers onto
 stack for `extractelement`

Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers
when the function was marked as a no-realign-stack function. This lead to misalignment
between the stack and the instruction generated. This patch fixes the issue.

Fixes #77730
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 21 +++++++++++++++++--
 .../AArch64/sve-extract-fixed-vector.ll       |  2 +-
 ...igned_extract_from_vector_through_stack.ll | 20 ++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index adfeea073bff6..568cdd9cfd59f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -1377,6 +1378,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   }
 }
 
+// Helper function that generates an MMO that considers the alignment of the
+// stack, and the size of the stack object
+static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
+                                             MachineFunction &MF, bool isObjectScalable) {
+  auto &MFI = MF.getFrameInfo();
+  int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+  auto objectSize = isObjectScalable ? ~UINT16_C(0) : MFI.getObjectSize(FI);
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                              objectSize, MFI.getObjectAlign(FI));
+
+  return MMO;
+}
+
 SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   SDValue Vec = Op.getOperand(0);
   SDValue Idx = Op.getOperand(1);
@@ -1426,8 +1442,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   if (!Ch.getNode()) {
     // Store the value to a temporary stack slot, then LOAD the returned part.
     StackPtr = DAG.CreateStackTemporary(VecVT);
-    Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
-                      MachinePointerInfo());
+    MachineMemOperand *StoreMMO =
+        getStackAlignedMMO(StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
+    Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
   }
 
   SDValue NewLoad;
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index d2cbbe0628f0f..2c2bb0af84501 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK
 
 ; Should codegen to a nop, since idx is zero.
diff --git a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
new file mode 100644
index 0000000000000..52d0c2b509128
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define i32 @foo(i32 %arg1) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $31, %edi
+; CHECK-NEXT:    movzbl -40(%rsp,%rdi), %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = extractelement <32 x i8> zeroinitializer, i32 %arg1
+  %b = zext i8 %a to i32
+  ret i32 %b
+}
+
+attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }



More information about the llvm-commits mailing list