[PATCH] D149348: RFD: Do not CSE convergent calls in different basic blocks
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 27 08:20:34 PDT 2023
foad updated this revision to Diff 517570.
foad added a comment.
Move logic from Instruction::hasSameSpecialState to
Instruction::isIdenticalToWhenDefined. @nikic is that more acceptable?
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D149348/new/
https://reviews.llvm.org/D149348
Files:
llvm/lib/IR/Instruction.cpp
llvm/lib/Transforms/Scalar/EarlyCSE.cpp
llvm/test/CodeGen/AMDGPU/cse-convergent.ll
llvm/test/Transforms/SimplifyCFG/convergent.ll
Index: llvm/test/Transforms/SimplifyCFG/convergent.ll
===================================================================
--- llvm/test/Transforms/SimplifyCFG/convergent.ll
+++ llvm/test/Transforms/SimplifyCFG/convergent.ll
@@ -82,6 +82,8 @@
; SINK-NEXT: [[TMP0:%.*]] = tail call i32 @tid()
; SINK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 1
; SINK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[REM]], 0
+; SINK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP0]] to i64
+; SINK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[Y_COERCE:%.*]], i64 [[IDXPROM4]]
; SINK-NEXT: br i1 [[CMP_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
; SINK: if.then:
; SINK-NEXT: [[TMP1:%.*]] = tail call i32 @mbcnt(i32 -1, i32 0)
@@ -101,8 +103,6 @@
; SINK-NEXT: br label [[IF_END]]
; SINK: if.end:
; SINK-NEXT: [[DOTSINK:%.*]] = phi i32 [ [[TMP6]], [[IF_ELSE]] ], [ [[TMP3]], [[IF_THEN]] ]
-; SINK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP0]] to i64
-; SINK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[Y_COERCE:%.*]], i64 [[IDXPROM4]]
; SINK-NEXT: store i32 [[DOTSINK]], ptr [[ARRAYIDX5]], align 4
; SINK-NEXT: ret void
;
Index: llvm/test/CodeGen/AMDGPU/cse-convergent.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/cse-convergent.ll
+++ llvm/test/CodeGen/AMDGPU/cse-convergent.ll
@@ -21,15 +21,25 @@
; GCN-NEXT: s_or_saveexec_b32 s4, -1
; GCN-NEXT: v_mov_b32_dpp v2, v3 row_xmask:1 row_mask:0xf bank_mask:0xf
; GCN-NEXT: s_mov_b32 exec_lo, s4
-; GCN-NEXT: v_mov_b32_e32 v4, 0
-; GCN-NEXT: v_mov_b32_e32 v0, v2
+; GCN-NEXT: v_mov_b32_e32 v5, 0
+; GCN-NEXT: v_mov_b32_e32 v4, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v4, v0
+; GCN-NEXT: s_or_saveexec_b32 s5, -1
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_mov_b32 exec_lo, s5
+; GCN-NEXT: v_mov_b32_e32 v3, v0
+; GCN-NEXT: s_not_b32 exec_lo, exec_lo
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: s_not_b32 exec_lo, exec_lo
+; GCN-NEXT: s_or_saveexec_b32 s5, -1
+; GCN-NEXT: v_mov_b32_dpp v2, v3 row_xmask:1 row_mask:0xf bank_mask:0xf
+; GCN-NEXT: s_mov_b32 exec_lo, s5
+; GCN-NEXT: v_mov_b32_e32 v5, v2
; GCN-NEXT: ; %bb.2: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v4
+; GCN-NEXT: v_add_nc_u32_e32 v0, v4, v5
; GCN-NEXT: s_xor_saveexec_b32 s4, -1
; GCN-NEXT: s_clause 0x1
; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32
Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -318,6 +318,16 @@
return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
GCR->getBasePtr(), GCR->getDerivedPtr());
+ if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
+ // Don't CSE convergent calls in different basic blocks, because they
+ // implicitly depend on the set of threads that is currently executing.
+ if (CI->isConvergent()) {
+ return hash_combine(
+ Inst->getOpcode(), Inst->getParent(),
+ hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+ }
+ }
+
// Mix in the opcode.
return hash_combine(
Inst->getOpcode(),
Index: llvm/lib/IR/Instruction.cpp
===================================================================
--- llvm/lib/IR/Instruction.cpp
+++ llvm/lib/IR/Instruction.cpp
@@ -571,6 +571,14 @@
getType() != I->getType())
return false;
+ if (const CallInst *CI = dyn_cast<CallInst>(this)) {
+ // Convergent calls implicitly depend on the set of threads that is
+ // currently executing, so conservatively return false if they are in
+ // different basic blocks.
+ if (CI->isConvergent() && CI->getParent() != I->getParent())
+ return false;
+ }
+
// If both instructions have no operands, they are identical.
if (getNumOperands() == 0 && I->getNumOperands() == 0)
return this->hasSameSpecialState(I);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D149348.517570.patch
Type: text/x-patch
Size: 4198 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230427/582d1fa6/attachment.bin>
More information about the llvm-commits
mailing list