[llvm] [SLP] Reject duplicate shift amounts in matchesShlZExt reorder path (PR #183627)
Akash Dutta via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 27 08:14:06 PST 2026
https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/183627
>From e320929357c2cd68ad2a490632f25083b13ffbea Mon Sep 17 00:00:00 2001
From: Akash Dutta <Akash.Dutta at amd.com>
Date: Thu, 26 Feb 2026 16:05:09 -0600
Subject: [PATCH 1/2] duplicate shift check zext bitcast reorder
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 5 ++
.../AMDGPU/zext-duplicate-shift.ll | 59 +++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/zext-duplicate-shift.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3a10d2b2a7158..7ec3a341d01e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13420,6 +13420,8 @@ bool BoUpSLP::matchesShlZExt(const TreeEntry &TE, OrdersType &Order,
} else {
const unsigned VF = RhsTE->getVectorFactor();
Order.assign(VF, VF);
+ // Track which logical positions we've seen; reject duplicate shift amounts.
+ SmallBitVector SeenPositions(VF);
// Check if need to reorder Rhs to make it in form (0, Stride, 2 * Stride,
// ..., Sz-Stride).
if (VF * Stride != Sz)
@@ -13434,6 +13436,9 @@ bool BoUpSLP::matchesShlZExt(const TreeEntry &TE, OrdersType &Order,
if (Val.isNegative() || Val.uge(Sz) || Val.getZExtValue() % Stride != 0)
return false;
unsigned Pos = Val.getZExtValue() / Stride;
+ if (SeenPositions.test(Pos))
+ return false;
+ SeenPositions.set(Pos);
// TODO: Support Pos >= VF, in this case need to shift the final value.
if (Order[Idx] != VF || Pos >= VF)
return false;
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/zext-duplicate-shift.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/zext-duplicate-shift.ll
new file mode 100644
index 0000000000000..a87e5441a6431
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/zext-duplicate-shift.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -S -o - < %s | FileCheck --implicit-check-not='<i32 2, i32 3, i32 poison, i32 poison>' %s
+
+define void @duplicate_shift_i128_store(ptr %base, i32 %spec_select, i32 %spec_select37, i1 %narrow) {
+; CHECK-LABEL: define void @duplicate_shift_i128_store(
+; CHECK-SAME: ptr [[BASE:%.*]], i32 [[SPEC_SELECT:%.*]], i32 [[SPEC_SELECT37:%.*]], i1 [[NARROW:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 1024
+; CHECK-NEXT: [[P_INSERT_11:%.*]] = select i1 [[NARROW]], i128 158456325046975419260797452288, i128 158456325028528675187087900672
+; CHECK-NEXT: [[ZEXT37:%.*]] = zext i32 [[SPEC_SELECT37]] to i128
+; CHECK-NEXT: [[SHL37:%.*]] = shl nuw nsw i128 [[ZEXT37]], 32
+; CHECK-NEXT: [[MASKED37:%.*]] = and i128 [[SHL37]], 18446742974197923840
+; CHECK-NEXT: [[AND37_255:%.*]] = and i32 [[SPEC_SELECT37]], 255
+; CHECK-NEXT: [[ZEXT37_255:%.*]] = zext nneg i32 [[AND37_255]] to i128
+; CHECK-NEXT: [[SHL37_255:%.*]] = shl nuw nsw i128 [[ZEXT37_255]], 32
+; CHECK-NEXT: [[P_MASK_7:%.*]] = or disjoint i128 [[P_INSERT_11]], [[MASKED37]]
+; CHECK-NEXT: [[P_INSERT_7:%.*]] = or disjoint i128 [[P_MASK_7]], [[SHL37_255]]
+; CHECK-NEXT: [[PREFIX_HIGH:%.*]] = and i32 [[SPEC_SELECT]], -16777216
+; CHECK-NEXT: [[ZEXT_HIGH:%.*]] = zext i32 [[PREFIX_HIGH]] to i128
+; CHECK-NEXT: [[P_INSERT_6:%.*]] = or disjoint i128 [[P_INSERT_7]], [[ZEXT_HIGH]]
+; CHECK-NEXT: [[AND_16711680:%.*]] = and i32 [[SPEC_SELECT]], 16711680
+; CHECK-NEXT: [[ZEXT_16711680:%.*]] = zext nneg i32 [[AND_16711680]] to i128
+; CHECK-NEXT: [[P_MASK_4:%.*]] = or disjoint i128 [[P_INSERT_6]], [[ZEXT_16711680]]
+; CHECK-NEXT: [[AND_255:%.*]] = and i32 [[SPEC_SELECT]], 255
+; CHECK-NEXT: [[ZEXT_255:%.*]] = zext nneg i32 [[AND_255]] to i128
+; CHECK-NEXT: [[AND_65280:%.*]] = and i32 [[SPEC_SELECT]], 65280
+; CHECK-NEXT: [[ZEXT_65280:%.*]] = zext nneg i32 [[AND_65280]] to i128
+; CHECK-NEXT: [[P_MASK_0:%.*]] = or disjoint i128 [[P_MASK_4]], [[ZEXT_65280]]
+; CHECK-NEXT: [[P_INSERT_0:%.*]] = or disjoint i128 [[P_MASK_0]], [[ZEXT_255]]
+; CHECK-NEXT: store i128 [[P_INSERT_0]], ptr [[ARRAYIDX]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %arrayidx = getelementptr inbounds i8, ptr %base, i64 1024
+ %p_insert_11 = select i1 %narrow, i128 158456325046975419260797452288, i128 158456325028528675187087900672
+ %zext37 = zext i32 %spec_select37 to i128
+ %shl37 = shl nuw nsw i128 %zext37, 32
+ %masked37 = and i128 %shl37, 18446742974197923840
+ %and37_255 = and i32 %spec_select37, 255
+ %zext37_255 = zext nneg i32 %and37_255 to i128
+ %shl37_255 = shl nuw nsw i128 %zext37_255, 32
+ %p_mask_7 = or disjoint i128 %p_insert_11, %masked37
+ %p_insert_7 = or disjoint i128 %p_mask_7, %shl37_255
+ %prefix_high = and i32 %spec_select, -16777216
+ %zext_high = zext i32 %prefix_high to i128
+ %p_insert_6 = or disjoint i128 %p_insert_7, %zext_high
+ %and_16711680 = and i32 %spec_select, 16711680
+ %zext_16711680 = zext nneg i32 %and_16711680 to i128
+ %p_mask_4 = or disjoint i128 %p_insert_6, %zext_16711680
+ %and_255 = and i32 %spec_select, 255
+ %zext_255 = zext nneg i32 %and_255 to i128
+ %and_65280 = and i32 %spec_select, 65280
+ %zext_65280 = zext nneg i32 %and_65280 to i128
+ %p_mask_0 = or disjoint i128 %p_mask_4, %zext_65280
+ %p_insert_0 = or disjoint i128 %p_mask_0, %zext_255
+ ; Generic store i128 (triggers SLP).
+ store i128 %p_insert_0, ptr %arrayidx, align 16
+ ret void
+}
>From 6a07f8806570de92f2e5b8b7b395ce35a608501f Mon Sep 17 00:00:00 2001
From: Akash Dutta <Akash.Dutta at amd.com>
Date: Thu, 26 Feb 2026 20:46:02 -0600
Subject: [PATCH 2/2] update seen positions tracker to SmallVector
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7ec3a341d01e0..883b00083f85c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13421,7 +13421,8 @@ bool BoUpSLP::matchesShlZExt(const TreeEntry &TE, OrdersType &Order,
const unsigned VF = RhsTE->getVectorFactor();
Order.assign(VF, VF);
// Track which logical positions we've seen; reject duplicate shift amounts.
- SmallBitVector SeenPositions(VF);
+ SmallVector<unsigned, 4> SeenPositions;
+ SeenPositions.assign(VF, VF);
// Check if need to reorder Rhs to make it in form (0, Stride, 2 * Stride,
// ..., Sz-Stride).
if (VF * Stride != Sz)
@@ -13436,9 +13437,9 @@ bool BoUpSLP::matchesShlZExt(const TreeEntry &TE, OrdersType &Order,
if (Val.isNegative() || Val.uge(Sz) || Val.getZExtValue() % Stride != 0)
return false;
unsigned Pos = Val.getZExtValue() / Stride;
- if (SeenPositions.test(Pos))
+ if (SeenPositions[Pos] != VF)
return false;
- SeenPositions.set(Pos);
+ SeenPositions[Pos] = Pos;
// TODO: Support Pos >= VF, in this case need to shift the final value.
if (Order[Idx] != VF || Pos >= VF)
return false;
More information about the llvm-commits
mailing list