[llvm] [X86] Fix arithmetic error in extractVector (PR #128052)

Daniel Zabawa via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 11:24:13 PST 2025


https://github.com/daniel-zabawa updated https://github.com/llvm/llvm-project/pull/128052

>From d5cfde70d4037c25704387d3abf8bdc24101d4ea Mon Sep 17 00:00:00 2001
From: "Zabawa, Daniel" <daniel.zabawa at intel.com>
Date: Thu, 20 Feb 2025 11:09:40 -0800
Subject: [PATCH 1/2] [X86] Fix arithmetic error in extractVector

The computation of the element count for the result VT in extractVector
is incorrect when vector width does not divide VT.getSizeInBits(), which
can occur when the source vector element count is not a power of two,
e.g. extracting a vectorWidth 256b vector from a 384b source.

This rewrites the expression so the division is exact given that
vectorWidth is a multiple of the source element size.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1c9d43ce4c062..b7a49bf3e32f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4066,6 +4066,10 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
                                 const SDLoc &dl, unsigned vectorWidth) {
   EVT VT = Vec.getValueType();
   EVT ElVT = VT.getVectorElementType();
+  unsigned ResultNumElts =
+      (VT.getVectorNumElements() * vectorWidth) / VT.getSizeInBits();
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, ResultNumElts);
+
   unsigned Factor = VT.getSizeInBits() / vectorWidth;
   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
                                   VT.getVectorNumElements() / Factor);

>From 9195c2b056fbec69a782e458e4c89093fa813b70 Mon Sep 17 00:00:00 2001
From: "Zabawa, Daniel" <daniel.zabawa at intel.com>
Date: Thu, 20 Feb 2025 11:23:55 -0800
Subject: [PATCH 2/2] add unit test

---
 llvm/test/CodeGen/X86/pr128052.ll | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr128052.ll

diff --git a/llvm/test/CodeGen/X86/pr128052.ll b/llvm/test/CodeGen/X86/pr128052.ll
new file mode 100644
index 0000000000000..1a67e64b69832
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr128052.ll
@@ -0,0 +1,30 @@
+; Ensure assertion is not hit when folding concat of two contiguous extract_subvector operations
+; from a source with a non-power-of-two vector length.
+; RUN: llc -mattr=+avx2 < %s
+
+source_filename = "foo.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(ptr noundef %pDst, ptr noundef %pSrc) {
+bb0:
+  %sptr1 = getelementptr i8, ptr %pSrc, i64 32
+  %load598 = load <12 x float>, ptr %sptr1, align 1
+  br label %bb1
+bb1:
+  %sptr0 = getelementptr i8, ptr %pSrc, i64 16
+  %load617 = load <12 x float>, ptr %sptr0, align 1
+  %42 = fsub contract <12 x float> %load617, %load598
+  %43 = shufflevector <12 x float> %42, <12 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %44 = fsub contract <12 x float> %load617, %load598
+  %45 = shufflevector <12 x float> %44, <12 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %46 = fsub contract <12 x float> %load617, %load598
+  %47 = shufflevector <12 x float> %46, <12 x float> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %dptr0 = getelementptr i8, ptr %pDst, i64 16
+  %dptr1 = getelementptr i8, ptr %pDst, i64 32 
+  %dptr2 = getelementptr i8, ptr %pDst, i64 48
+  store <4 x float> %43, ptr %dptr0, align 1
+  store <4 x float> %45, ptr %dptr1, align 1
+  store <4 x float> %47, ptr %dptr2, align 1
+  ret void
+}



More information about the llvm-commits mailing list