[llvm] [LegalizeTypes] Handle non byte-sized elt types when splitting INSERT/EXTRACT_VECTOR_ELT (PR #93357)

Björn Pettersson via llvm-commits llvm-commits at lists.llvm.org
Fri May 24 17:00:48 PDT 2024


https://github.com/bjope created https://github.com/llvm/llvm-project/pull/93357

DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT and DAGTypeLegalizer::SplitVecRes_EXTRACT_VECTOR_ELT did not handle non byte-sized elements properly. In fact, it only dealt with elements smaller than 8 bits (as well as byte-sized elements).

This patch generalizes the support for non byte-sized element by always extending the vector elements to match the store size for the element type when legalizing via a stack temporary. This should make sure that we can access a single element via a simple byte-addressed scalar load/store.

>From 08cad581538ea979a24145e127a6d2e8cbeee01b Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Sat, 25 May 2024 01:35:12 +0200
Subject: [PATCH] [LegalizeTypes] Handle non byte-sized elt types when
 splitting INSERT/EXTRACT_VECTOR_ELT

DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT and
DAGTypeLegalizer::SplitVecRes_EXTRACT_VECTOR_ELT did not handle
non byte-sized elements properly. In fact, it only dealt with
elements smaller than 8 bits (as well as byte-sized elements).

This patch generalizes the support for non byte-sized element by
always extending the vector elements to match the store size for
the element type when legalizing via a stack temporary. This should
make sure that we can access a single element via a simple
byte-addressed scalar load/store.
---
 llvm/include/llvm/CodeGen/ValueTypes.h        |  9 +++
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 16 ++---
 .../CodeGen/X86/legalize-ins-ext-vec-elt.ll   | 60 +++++++++++++++++++
 3 files changed, 75 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/legalize-ins-ext-vec-elt.ll

diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h
index dab6c421bf6e6..9ad582c8ab7a9 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -432,6 +432,15 @@ namespace llvm {
       return EVT::getVectorVT(Context, EltVT, getVectorElementCount());
     }
 
+    /// Return a VT for an integer vector type with the size of the elements
+    /// extended to the store size. The typed returned may be an extended
+    /// type.
+    EVT getStoreSizedIntegerVectorElementType(LLVMContext &Context) const {
+      EVT EltVT = getVectorElementType();
+      EltVT = EVT::getIntegerVT(Context, EltVT.getStoreSizeInBits());
+      return EVT::getVectorVT(Context, EltVT, getVectorElementCount());
+    }
+
     // Return a VT for a vector type with the same element type but
     // half the number of elements. The type returned may be an
     // extended type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 40e621f0db220..28ecd229bf398 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1832,10 +1832,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Make the vector elements byte-addressable if they aren't already.
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
-  if (VecVT.getScalarSizeInBits() < 8) {
-    EltVT = MVT::i8;
-    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                             VecVT.getVectorElementCount());
+  if (!EltVT.isByteSized()) {
+    VecVT = VecVT.getStoreSizedIntegerVectorElementType(*DAG.getContext());
+    EltVT = VecVT.getVectorElementType();
     Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
     // Extend the element type to match if needed.
     if (EltVT.bitsGT(Elt.getValueType()))
@@ -3443,10 +3442,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   // Make the vector elements byte-addressable if they aren't already.
   SDLoc dl(N);
   EVT EltVT = VecVT.getVectorElementType();
-  if (VecVT.getScalarSizeInBits() < 8) {
-    EltVT = MVT::i8;
-    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                             VecVT.getVectorElementCount());
+  if (!EltVT.isByteSized()) {
+    VecVT = VecVT.getStoreSizedIntegerVectorElementType(*DAG.getContext());
+    EltVT = VecVT.getVectorElementType();
     Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
   }
 
@@ -3465,8 +3463,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   // Load back the required element.
   StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
 
-  // FIXME: This is to handle i1 vectors with elements promoted to i8.
-  // i1 vector handling needs general improvement.
   if (N->getValueType(0).bitsLT(EltVT)) {
     SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
       MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
diff --git a/llvm/test/CodeGen/X86/legalize-ins-ext-vec-elt.ll b/llvm/test/CodeGen/X86/legalize-ins-ext-vec-elt.ll
new file mode 100644
index 0000000000000..7b517c2ca574f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/legalize-ins-ext-vec-elt.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-- -o - %s| FileCheck %s
+
+; Verify that we support non byte-sized elements, together with variable index.
+
+define void @Legalize_SplitVectorResult_insert_i28(i28 %elt, i16 %idx, ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: Legalize_SplitVectorResult_insert_i28:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $7, %esi
+; CHECK-NEXT:    movl %edi, -40(%rsp,%rsi,4)
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [268435455,268435455,268435455,268435455]
+; CHECK-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    andps %xmm0, %xmm1
+; CHECK-NEXT:    andps -{{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT:    movaps %xmm0, 16(%rcx)
+; CHECK-NEXT:    movaps %xmm1, (%rcx)
+; CHECK-NEXT:    retq
+  %vec1 = insertelement <8 x i28> zeroinitializer, i28 %elt, i16 %idx
+  %vec2 = zext <8 x i28> %vec1 to <8 x i32>
+  store <8 x i32> %vec2, ptr %p2
+  ret void
+}
+
+define void @Legalize_SplitVectorResult_extract_i12(i16 %idx, ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: Legalize_SplitVectorResult_extract_i12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    movaps (%rsi), %xmm0
+; CHECK-NEXT:    movaps 16(%rsi), %xmm1
+; CHECK-NEXT:    movaps 32(%rsi), %xmm2
+; CHECK-NEXT:    movaps 48(%rsi), %xmm3
+; CHECK-NEXT:    movaps 64(%rsi), %xmm4
+; CHECK-NEXT:    movaps 80(%rsi), %xmm5
+; CHECK-NEXT:    movaps 96(%rsi), %xmm6
+; CHECK-NEXT:    movaps 112(%rsi), %xmm7
+; CHECK-NEXT:    movaps %xmm7, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm6, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm5, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $63, %edi
+; CHECK-NEXT:    movzwl -128(%rsp,%rdi,2), %eax
+; CHECK-NEXT:    andl $4095, %eax # imm = 0xFFF
+; CHECK-NEXT:    movw %ax, (%rdx)
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %vec = load <64 x i16>, ptr %p1
+  %trunc = trunc <64 x i16> %vec to <64 x i12>
+  %elt = extractelement <64 x i12> %trunc, i16 %idx
+  store i12 %elt, ptr %p2
+  ret void
+}



More information about the llvm-commits mailing list