[llvm] AMDGPU: Simplify demanded vector elts of readfirstlane sources (PR #128646)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 27 21:57:35 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/128646

>From e32caff738d765d4b9c989390542efa8710fea4d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 25 Feb 2025 12:16:21 +0700
Subject: [PATCH] AMDGPU: Simplify demanded vector elts of readfirstlane
 sources

Stub implementation of simplifyDemandedVectorEltsIntrinsic for
readfirstlane.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp |  4 ++++
 .../simplify-demanded-vector-elts-lane-intrinsics.ll  | 11 +++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index cb918e16f0f3b..d69cfbbe4088e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1569,6 +1569,10 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
     std::function<void(Instruction *, unsigned, APInt, APInt &)>
         SimplifyAndSetOp) const {
   switch (II.getIntrinsicID()) {
+  case Intrinsic::amdgcn_readfirstlane:
+    // TODO: For a vector extract, should reduce the intrinsic call type.
+    SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+    return std::nullopt;
   case Intrinsic::amdgcn_raw_buffer_load:
   case Intrinsic::amdgcn_raw_ptr_buffer_load:
   case Intrinsic::amdgcn_raw_buffer_load_format:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index 83d9d0d032ed1..836c739048411 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -306,10 +306,9 @@ define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
 define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 1
 ; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
   %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
@@ -338,11 +337,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
 define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+; CHECK-NEXT:    ret <2 x i32> poison
 ;
   %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
   %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2



More information about the llvm-commits mailing list