[llvm] [SelectionDAG] Use unaligned store to move AVX registers onto stack for `extractelement` (PR #78422)

Manish Kausik H via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 17 02:23:34 PST 2024


https://github.com/Nirhar created https://github.com/llvm/llvm-project/pull/78422

Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue.

Fixes #77730

>From dce9660593ae4485f15fdc76721981b32de7a224 Mon Sep 17 00:00:00 2001
From: Nirhar <hmanishkausik at gmail.com>
Date: Wed, 17 Jan 2024 15:46:06 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store to move AVX registers onto
 stack for `extractelement`

Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers
when the function was marked as a no-realign-stack function. This lead to misalignment
between the stack and the instruction generated. This patch fixes the issue.

Fixes #77730
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 14 +++++++++--
 ...igned_extract_from_vector_through_stack.ll | 23 +++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index adfeea073bff65..4de78f75bbb7df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -1425,9 +1426,18 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
 
   if (!Ch.getNode()) {
     // Store the value to a temporary stack slot, then LOAD the returned part.
+    auto &MF = DAG.getMachineFunction();
+    auto &MFI = MF.getFrameInfo();
     StackPtr = DAG.CreateStackTemporary(VecVT);
-    Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
-                      MachinePointerInfo());
+    int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+    MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+    
+    MachineMemOperand *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlign(FI));
+
+    Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
   }
 
   SDValue NewLoad;
diff --git a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
new file mode 100644
index 00000000000000..70d92e0879c1e7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s  | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: %bb.0:   
+;CHECK-NEXT: # kill: def $edi killed $edi def $rdi                             
+;CHECK-NEXT: vxorps	%xmm0, %xmm0, %xmm0
+;CHECK-NEXT: vmovups	%ymm0, -40(%rsp)
+;CHECK-NEXT: andl	$31, %edi
+;CHECK-NEXT: movzbl	-40(%rsp,%rdi), %eax
+;CHECK-NEXT: vzeroupper
+;CHECK-NEXT: retq
+
+define i32 @foo(i32 %arg1) #0 {
+entry:
+  %a = extractelement <32 x i8> zeroinitializer, i32 %arg1
+  %b = zext i8 %a to i32
+  ret i32 %b
+}
+
+attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }
\ No newline at end of file



More information about the llvm-commits mailing list