[llvm] AMDGPU: Reduce intrinsics for single demanded vector element (PR #141810)
Maksim Levental via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 10:42:43 PDT 2025
https://github.com/makslevental updated https://github.com/llvm/llvm-project/pull/141810
>From f3c5df8f35cbb83dca4e9706d7c3f8dcb20f42db Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental at gmail.com>
Date: Wed, 28 May 2025 13:39:28 -0400
Subject: [PATCH] AMDGPU: Reduce readlane for single demanded vector element
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 6 +++-
...fy-demanded-vector-elts-lane-intrinsics.ll | 36 +++++++++++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5f6ab24182d5e..50f5816a27d2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1837,7 +1837,10 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
// TODO: Preserve callsite attributes?
- CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
+ SmallVector<Value *> Args{Extract};
+ if (II.arg_size() > 1)
+ Args.push_back(II.getArgOperand(1));
+ CallInst *NewCall = IC.Builder.CreateCall(Remangled, Args, OpBundles);
return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
NewCall, FirstElt);
@@ -1872,6 +1875,7 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
SimplifyAndSetOp) const {
switch (II.getIntrinsicID()) {
case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane:
SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
return simplifyAMDGCNLaneIntrinsicDemanded(IC, II, DemandedElts, UndefElts);
case Intrinsic::amdgcn_raw_buffer_load:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index 056caabb6d60a..8c773dd53ffdf 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -25,6 +25,42 @@ define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) {
ret i16 %elt
}
+define half @extract_elt0_v2f16_readlane_imm_0(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readlane_imm_0(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[SRC]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.readlane.f16(half [[TMP1]], i32 0)
+; CHECK-NEXT: ret half [[TMP2]]
+;
+ %x = call <2 x half> @llvm.amdgcn.readlane.v2f16(<2 x half> %src, i32 0)
+ %elt = extractelement <2 x half> %x, i32 0
+ ret half %elt
+}
+
+define half @extract_elt0_v2f16_readlane_imm_1(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readlane_imm_1(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[SRC]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.readlane.f16(half [[TMP1]], i32 1)
+; CHECK-NEXT: ret half [[TMP2]]
+;
+ %x = call <2 x half> @llvm.amdgcn.readlane.v2f16(<2 x half> %src, i32 1)
+ %elt = extractelement <2 x half> %x, i32 1
+ ret half %elt
+}
+
+define half @extract_elt0_v2f16_readlane(<2 x half> %src, i32 %idx) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[X:%.*]] = call <2 x half> @llvm.amdgcn.readlane.v2f16(<2 x half> [[SRC]], i32 [[IDX]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[X]], i32 [[IDX]]
+; CHECK-NEXT: ret half [[ELT]]
+;
+ %x = call <2 x half> @llvm.amdgcn.readlane.v2f16(<2 x half> %src, i32 %idx)
+ %elt = extractelement <2 x half> %x, i32 %idx
+ ret half %elt
+}
+
define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) {
; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane(
; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] {
More information about the llvm-commits
mailing list