[llvm] Revert "AMDGPU: Handle demanded subvectors for readfirstlane" (PR #130276)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 04:14:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jan Patrick Lehr (jplehr)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->128648
Broke buildbots:
https://lab.llvm.org/buildbot/#/builders/10/builds/816
https://lab.llvm.org/buildbot/#/builders/140/builds/18458
and others.
---
Full diff: https://github.com/llvm/llvm-project/pull/130276.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+10-34)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll (+19-32)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ef076814ffdab..ebe740f884ea6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1574,59 +1574,35 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
const unsigned LastElt = DemandedElts.getActiveBits() - 1;
const unsigned MaskLen = LastElt - FirstElt + 1;
- unsigned OldNumElts = VT->getNumElements();
- if (MaskLen == OldNumElts && MaskLen != 1)
+ // TODO: Handle general subvector extract.
+ if (MaskLen != 1)
return nullptr;
Type *EltTy = VT->getElementType();
- Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
-
- // Theoretically we should support these intrinsics for any legal type. Avoid
- // introducing cases that aren't direct register types like v3i16.
- if (!isTypeLegal(NewVT))
+ if (!isTypeLegal(EltTy))
return nullptr;
Value *Src = II.getArgOperand(0);
+ assert(FirstElt == LastElt);
+ Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
// Make sure convergence tokens are preserved.
// TODO: CreateIntrinsic should allow directly copying bundles
SmallVector<OperandBundleDef, 2> OpBundles;
II.getOperandBundlesAsDefs(OpBundles);
Module *M = IC.Builder.GetInsertBlock()->getModule();
- Function *Remangled =
- Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
-
- if (MaskLen == 1) {
- Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
- // TODO: Preserve callsite attributes?
- CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
-
- return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
- NewCall, FirstElt);
- }
-
- SmallVector<int> ExtractMask(MaskLen, -1);
- for (unsigned I = 0; I != MaskLen; ++I) {
- if (DemandedElts[FirstElt + I])
- ExtractMask[I] = FirstElt + I;
- }
-
- Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
+ Function *Remangled = Intrinsic::getOrInsertDeclaration(
+ M, II.getIntrinsicID(), {Extract->getType()});
// TODO: Preserve callsite attributes?
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
- SmallVector<int> InsertMask(OldNumElts, -1);
- for (unsigned I = 0; I != MaskLen; ++I) {
- if (DemandedElts[FirstElt + I])
- InsertMask[FirstElt + I] = I;
- }
-
// FIXME: If the call has a convergence bundle, we end up leaving the dead
// call behind.
- return IC.Builder.CreateShuffleVector(NewCall, InsertMask);
+ return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), NewCall,
+ FirstElt);
}
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index ec645a7ff4519..e9d3b5e963b35 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -100,9 +100,8 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -113,9 +112,7 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
@@ -261,8 +258,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -273,8 +270,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -285,9 +282,7 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -326,9 +321,8 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -371,10 +365,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[VEC1:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -413,9 +404,7 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -439,9 +428,7 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
-; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/130276
More information about the llvm-commits
mailing list