[llvm] d410f09 - AMDGPU: Simplify demanded vector elts of readfirstlane sources (#128646)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 27 22:01:14 PST 2025


Author: Matt Arsenault
Date: 2025-02-28T13:01:10+07:00
New Revision: d410f093da7b9e3cd245dac62682ec1acd29d117

URL: https://github.com/llvm/llvm-project/commit/d410f093da7b9e3cd245dac62682ec1acd29d117
DIFF: https://github.com/llvm/llvm-project/commit/d410f093da7b9e3cd245dac62682ec1acd29d117.diff

LOG: AMDGPU: Simplify demanded vector elts of readfirstlane sources (#128646)

Stub implementation of simplifyDemandedVectorEltsIntrinsic for
readfirstlane.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
    llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index cb918e16f0f3b..d69cfbbe4088e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1569,6 +1569,10 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
     std::function<void(Instruction *, unsigned, APInt, APInt &)>
         SimplifyAndSetOp) const {
   switch (II.getIntrinsicID()) {
+  case Intrinsic::amdgcn_readfirstlane:
+    // TODO: For a vector extract, should reduce the intrinsic call type.
+    SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+    return std::nullopt;
   case Intrinsic::amdgcn_raw_buffer_load:
   case Intrinsic::amdgcn_raw_ptr_buffer_load:
   case Intrinsic::amdgcn_raw_buffer_load_format:

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index 83d9d0d032ed1..836c739048411 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -306,10 +306,9 @@ define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
 define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 1
 ; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
   %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
@@ -338,11 +337,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
 define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+; CHECK-NEXT:    ret <2 x i32> poison
 ;
   %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
   %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2


        


More information about the llvm-commits mailing list