[llvm] [SelectionDAG] Use unaligned store to legalize `EXTRACT_VECTOR_ELT` type (PR #98176)

Manish Kausik H via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 28 22:25:37 PDT 2024


https://github.com/Nirhar updated https://github.com/llvm/llvm-project/pull/98176

>From 0d4798d546bc468277afa28e94920224a310a40a Mon Sep 17 00:00:00 2001
From: Manish Kausik H <hmamishkausik at gmail.com>
Date: Mon, 29 Jul 2024 10:38:44 +0530
Subject: [PATCH] [SelectionDAG] Use unaligned store to legalize
 `EXTRACT_VECTOR_ELT` type when Stack is non-realignable

This patch sets the alignment of store instructions generated during type
legalization of extractelement instruction, after considering stack
alignment, if the stack is not realignable.

Fixes #98044
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  5 +++++
 ...ligned_extract_from_vector_through_stack.ll | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 79f90bae1d8d6..b576f4d82201e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2481,6 +2481,11 @@ Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
     Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
     if (RedAlign2 < RedAlign)
       RedAlign = RedAlign2;
+
+    if (!getMachineFunction().getFrameInfo().isStackRealignable())
+      // If the stack is not realignable, the alignment should be limited to the
+      // StackAlignment
+      RedAlign = std::min(RedAlign, StackAlign);
   }
 
   return RedAlign;
diff --git a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
index 52d0c2b509128..629f44b52bc05 100644
--- a/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
+++ b/llvm/test/CodeGen/X86/unaligned_extract_from_vector_through_stack.ll
@@ -17,4 +17,22 @@ entry:
   ret i32 %b
 }
 
+define i32 @foo2(i32 %arg1) #1 {
+; CHECK-LABEL: foo2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $31, %edi
+; CHECK-NEXT:    movzwl -72(%rsp,%rdi,2), %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = extractelement <32 x i16> zeroinitializer, i32 %arg1
+  %b = zext i16 %a to i32
+  ret i32 %b
+}
+
 attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }
+attributes #1 = { "no-realign-stack" "target-cpu"="skylake" }



More information about the llvm-commits mailing list