[llvm] 9ff7181 - [EarlyCSE] Do not CSE convergent calls with memory effects

Fri Jul 14 03:47:55 PDT 2023

Author: Jay Foad
Date: 2023-07-14T11:43:41+01:00
New Revision: 9ff71814cb5d71e907feaa0b3165e866b882f9aa

URL: https://github.com/llvm/llvm-project/commit/9ff71814cb5d71e907feaa0b3165e866b882f9aa
DIFF: https://github.com/llvm/llvm-project/commit/9ff71814cb5d71e907feaa0b3165e866b882f9aa.diff

LOG: [EarlyCSE] Do not CSE convergent calls with memory effects

D149348 did this for readnone calls, which are handled by SimpleValue.
This patch does the same for all other CSEable calls, which are handled
by CallValue.

Differential Revision: https://reviews.llvm.org/D153151

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/EarlyCSE.cpp
    llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 6344c997bde5d5..67e8e82e408f64 100644

--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -218,6 +218,19 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
   return true;
 }
 
+static unsigned hashCallInst(CallInst *CI) {
+  // Don't CSE convergent calls in 
diff erent basic blocks, because they
+  // implicitly depend on the set of threads that is currently executing.
+  if (CI->isConvergent()) {
+    return hash_combine(
+        CI->getOpcode(), CI->getParent(),
+        hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+  }
+  return hash_combine(
+      CI->getOpcode(),
+      hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+}
+
 static unsigned getHashValueImpl(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
   // Hash in all of the operands as pointers.
@@ -320,11 +333,8 @@ static unsigned getHashValueImpl(SimpleValue Val) {
 
   // Don't CSE convergent calls in 
diff erent basic blocks, because they
   // implicitly depend on the set of threads that is currently executing.
-  if (CallInst *CI = dyn_cast<CallInst>(Inst); CI && CI->isConvergent()) {
-    return hash_combine(
-        Inst->getOpcode(), Inst->getParent(),
-        hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
-  }
+  if (CallInst *CI = dyn_cast<CallInst>(Inst))
+    return hashCallInst(CI);
 
   // Mix in the opcode.
   return hash_combine(
@@ -524,15 +534,21 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
   Instruction *Inst = Val.Inst;
 
   // Hash all of the operands as pointers and mix in the opcode.
-  return hash_combine(
-      Inst->getOpcode(),
-      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+  return hashCallInst(cast<CallInst>(Inst));
 }
 
 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
-  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
   if (LHS.isSentinel() || RHS.isSentinel())
-    return LHSI == RHSI;
+    return LHS.Inst == RHS.Inst;
+
+  CallInst *LHSI = cast<CallInst>(LHS.Inst);
+  CallInst *RHSI = cast<CallInst>(RHS.Inst);
+
+  // Convergent calls implicitly depend on the set of threads that is
+  // currently executing, so conservatively return false if they are in
+  // 
diff erent basic blocks.
+  if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
+      return false;
 
   return LHSI->isIdenticalTo(RHSI);
 }

diff  --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
index b439ed64a543cb..7ae17264a7c737 100644
--- a/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
+++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll
@@ -11,9 +11,10 @@ define i32 @test_read_register(i32 %cond) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[COND]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[Y1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR2]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ]
+; CHECK-NEXT:    [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ]
 ; CHECK-NEXT:    [[RET:%.*]] = add i32 [[X1]], [[Y2]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
@@ -57,9 +58,10 @@ define i1 @test_live_mask(i32 %cond) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[COND]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[Y1:%.*]] = call i1 @llvm.amdgcn.live.mask() #[[ATTR2]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ]
+; CHECK-NEXT:    [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ]
 ; CHECK-NEXT:    [[RET:%.*]] = add i1 [[X1]], [[Y2]]
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;