[PATCH] D153151: [EarlyCSE] Do not CSE convergent readnone calls

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 19 06:22:40 PDT 2023


foad updated this revision to Diff 532629.
foad added a comment.

Rethink.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153151/new/

https://reviews.llvm.org/D153151

Files:
  llvm/lib/Transforms/Scalar/EarlyCSE.cpp
  llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll


Index: llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
===================================================================
--- llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
+++ llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
@@ -10,9 +10,10 @@
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[COND]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[Y1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR1]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ]
+; CHECK-NEXT:    [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ]
 ; CHECK-NEXT:    [[RET:%.*]] = add i32 [[X1]], [[Y2]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -218,6 +218,19 @@
   return true;
 }
 
+static unsigned hashCallInst(CallInst *CI) {
+  // Don't CSE convergent calls in different basic blocks, because they
+  // implicitly depend on the set of threads that is currently executing.
+  if (CI->isConvergent()) {
+    return hash_combine(
+        CI->getOpcode(), CI->getParent(),
+        hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+  }
+  return hash_combine(
+      CI->getOpcode(),
+      hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+}
+
 static unsigned getHashValueImpl(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
   // Hash in all of the operands as pointers.
@@ -320,11 +333,8 @@
 
   // Don't CSE convergent calls in different basic blocks, because they
   // implicitly depend on the set of threads that is currently executing.
-  if (CallInst *CI = dyn_cast<CallInst>(Inst); CI && CI->isConvergent()) {
-    return hash_combine(
-        Inst->getOpcode(), Inst->getParent(),
-        hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
-  }
+  if (CallInst *CI = dyn_cast<CallInst>(Inst))
+    return hashCallInst(CI);
 
   // Mix in the opcode.
   return hash_combine(
@@ -524,15 +534,21 @@
   Instruction *Inst = Val.Inst;
 
   // Hash all of the operands as pointers and mix in the opcode.
-  return hash_combine(
-      Inst->getOpcode(),
-      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+  return hashCallInst(cast<CallInst>(Inst));
 }
 
 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
-  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
   if (LHS.isSentinel() || RHS.isSentinel())
-    return LHSI == RHSI;
+    return LHS.Inst == RHS.Inst;
+
+  CallInst *LHSI = cast<CallInst>(LHS.Inst);
+  CallInst *RHSI = cast<CallInst>(RHS.Inst);
+
+  // Convergent calls implicitly depend on the set of threads that is
+  // currently executing, so conservatively return false if they are in
+  // different basic blocks.
+  if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
+      return false;
 
   return LHSI->isIdenticalTo(RHSI);
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153151.532629.patch
Type: text/x-patch
Size: 3147 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230619/c07ff020/attachment.bin>


More information about the llvm-commits mailing list