[llvm] d5148f0 - [X86] Fix arithmetic error in extractVector (#128052)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 05:39:32 PST 2025


Author: Daniel Zabawa
Date: 2025-02-24T21:39:28+08:00
New Revision: d5148f000a9213d5e64f49c67d0e861e8b303d92

URL: https://github.com/llvm/llvm-project/commit/d5148f000a9213d5e64f49c67d0e861e8b303d92
DIFF: https://github.com/llvm/llvm-project/commit/d5148f000a9213d5e64f49c67d0e861e8b303d92.diff

LOG: [X86] Fix arithmetic error in extractVector (#128052)

The computation of the element count for the result VT in extractVector
is incorrect when vector width does not divide VT.getSizeInBits(), which
can occur when the source vector element count is not a power of two,
e.g. extracting a vectorWidth 256b vector from a 384b source.

This rewrites the expression so the division is exact given that
vectorWidth is a multiple of the source element size.

Added: 
    llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1883337e0ef3a..269becb696875 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4076,9 +4076,12 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
                                 const SDLoc &dl, unsigned vectorWidth) {
   EVT VT = Vec.getValueType();
   EVT ElVT = VT.getVectorElementType();
-  unsigned Factor = VT.getSizeInBits() / vectorWidth;
-  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
-                                  VT.getVectorNumElements() / Factor);
+  unsigned ResultNumElts =
+      (VT.getVectorNumElements() * vectorWidth) / VT.getSizeInBits();
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, ResultNumElts);
+
+  assert(ResultVT.getSizeInBits() == vectorWidth &&
+         "Illegal subvector extraction");
 
   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();

diff  --git a/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll b/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll
new file mode 100644
index 0000000000000..d699b1a182845
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; Ensure assertion is not hit when folding concat of two contiguous extract_subvector operations
+; from a source with a non-power-of-two vector length.
+; RUN: llc -mtriple=x86_64 -mattr=+avx2 < %s | FileCheck %s
+
+define void @foo(ptr %pDst) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovups %ymm0, 16(%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %0 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = getelementptr i8, ptr %pDst, i64 16
+  %3 = getelementptr i8, ptr %pDst, i64 32
+  store <4 x float> %0, ptr %2, align 1
+  store <4 x float> %1, ptr %3, align 1
+  ret void
+}


        


More information about the llvm-commits mailing list