[llvm] r363587 - AMDGPU: Fold readlane/readfirstlane calls
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 10:52:35 PDT 2019
Author: arsenm
Date: Mon Jun 17 10:52:35 2019
New Revision: 363587
URL: http://llvm.org/viewvc/llvm-project?rev=363587&view=rev
Log:
AMDGPU: Fold readlane/readfirstlane calls
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=363587&r1=363586&r2=363587&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Jun 17 10:52:35 2019
@@ -3781,6 +3781,30 @@ Instruction *InstCombiner::visitCallInst
// A constant value is trivially uniform.
if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
return replaceInstUsesWith(*II, C);
+
+ // The rest of these may not be safe if the exec may not be the same between
+ // the def and use.
+ Value *Src = II->getArgOperand(0);
+ Instruction *SrcInst = dyn_cast<Instruction>(Src);
+ if (SrcInst && SrcInst->getParent() != II->getParent())
+ break;
+
+ // readfirstlane (readfirstlane x) -> readfirstlane x
+ // readlane (readfirstlane x), y -> readfirstlane x
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
+ return replaceInstUsesWith(*II, Src);
+
+ if (IID == Intrinsic::amdgcn_readfirstlane) {
+ // readfirstlane (readlane x, y) -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
+ return replaceInstUsesWith(*II, Src);
+ } else {
+ // readlane (readlane x, y), y -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
+ m_Value(), m_Specific(II->getArgOperand(1)))))
+ return replaceInstUsesWith(*II, Src);
+ }
+
break;
}
case Intrinsic::stackrestore: {
Modified: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll?rev=363587&r1=363586&r2=363587&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll Mon Jun 17 10:52:35 2019
@@ -2462,6 +2462,63 @@ define amdgpu_kernel void @readfirstlane
ret void
}
+define i32 @readfirstlane_idempotent(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_idempotent(
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT: ret i32 [[READ0]]
+;
+ %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+ %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
+ ret i32 %read2
+}
+
+define i32 @readfirstlane_readlane(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readlane(
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT: ret i32 [[READ0]]
+;
+ %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+ ret i32 %read1
+}
+
+define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
+; CHECK-NEXT: ret i32 [[READ1]]
+;
+bb0:
+ %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ br label %bb1
+
+bb1:
+ %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+ ret i32 %read1
+}
+
+define i32 @readfirstlane_readlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readlane_different_block(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
+; CHECK-NEXT: ret i32 [[READ1]]
+;
+bb0:
+ %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
+ br label %bb1
+
+bb1:
+ %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+ ret i32 %read1
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.readlane
; --------------------------------------------------------------------
@@ -2491,6 +2548,74 @@ define amdgpu_kernel void @readlane_cons
ret void
}
+define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
+; CHECK-LABEL: @readlane_idempotent(
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
+; CHECK-NEXT: ret i32 [[READ0]]
+;
+ %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
+ ret i32 %read1
+}
+
+define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
+; CHECK-LABEL: @readlane_idempotent_different_lanes(
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
+; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
+; CHECK-NEXT: ret i32 [[READ1]]
+;
+ %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
+ ret i32 %read1
+}
+
+define i32 @readlane_readfirstlane(i32 %arg) {
+; CHECK-LABEL: @readlane_readfirstlane(
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT: ret i32 [[READ0]]
+;
+ %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+ ret i32 %read1
+}
+
+define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
+; CHECK-LABEL: @readlane_idempotent_different_block(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
+; CHECK-NEXT: ret i32 [[READ1]]
+;
+bb0:
+ %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
+ br label %bb1
+
+bb1:
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
+ ret i32 %read1
+}
+
+
+define i32 @readlane_readfirstlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readlane_readfirstlane_different_block(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
+; CHECK-NEXT: ret i32 [[READ1]]
+;
+bb0:
+ %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ br label %bb1
+
+bb1:
+ %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+ ret i32 %read1
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.update.dpp.i32
; --------------------------------------------------------------------
More information about the llvm-commits
mailing list