[llvm] 35ec3ff - Disable Jump Threading for the targets with divergent control flow

Wed Dec 16 15:45:34 PST 2020

Author: alex-t
Date: 2020-12-17T02:40:54+03:00
New Revision: 35ec3ff76dee376661dd7e4971b80533a7a7f364

URL: https://github.com/llvm/llvm-project/commit/35ec3ff76dee376661dd7e4971b80533a7a7f364
DIFF: https://github.com/llvm/llvm-project/commit/35ec3ff76dee376661dd7e4971b80533a7a7f364.diff

LOG: Disable Jump Threading for the targets with divergent control flow

Details: Jump Threading does not make sense for the targets with divergent CF
         since they do not use branch prediction for speculative execution.
         Also in the high level IR there is no enough information to conclude that the branch is divergent or uniform.
         This may cause errors in further CF lowering.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D93302

Added: 
    llvm/test/Transforms/JumpThreading/divergent-target-test.ll

Modified: 
    llvm/lib/Transforms/Scalar/JumpThreading.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index a24dd8029f68..ce191144297b 100644

--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -153,6 +154,7 @@ namespace {
       AU.addPreserved<LazyValueInfoWrapperPass>();
       AU.addPreserved<GlobalsAAWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
     void releaseMemory() override { Impl.releaseMemory(); }
@@ -311,6 +313,10 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
 bool JumpThreading::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
+  auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  // Jump Threading has no sense for the targets with divergent CF
+  if (TTI->hasBranchDivergence())
+    return false;
   auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();

diff  --git a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
new file mode 100644
index 000000000000..4f7d237691c8
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
@@ -0,0 +1,47 @@
+; REQUIRES: amdgpu-registered-target && x86-registered-target
+; RUN: opt < %s -mtriple=amdgcn -jump-threading -S | FileCheck %s  -check-prefixes=CHECK,DIVERGENT
+; RUN: opt < %s -mtriple=x86_64 -jump-threading -S | FileCheck %s  -check-prefixes=CHECK,UNIFORM
+
+; Here we assure that for the target with no branch divergence usual Jump Threading optimization performed
+; For target with branch divergence - no optimization, so the IR is unchanged.
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test(i1 %cond) {
+; CHECK: test
+	br i1 %cond, label %T1, label %F1
+
+; DIVERGENT:   T1
+; UNIFORM-NOT: T1
+T1:
+	%v1 = call i32 @f1()
+	br label %Merge
+; DIVERGENT:   F1
+; UNIFORM-NOT: F1
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+; DIVERGENT:   Merge
+; UNIFORM-NOT: Merge
+Merge:
+	%A = phi i1 [true, %T1], [false, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+; DIVERGENT:   T2
+T2:
+; UNIFORM: T2:
+; UNIFORM: %v1 = call i32 @f1()
+; UNIFORM: call void @f3()
+; UNIFORM: ret i32 %v1
+	call void @f3()
+	ret i32 %B
+; DIVERGENT:   F2
+F2:
+; UNIFORM: F2:
+; UNIFORM: %v2 = call i32 @f2()
+; UNIFORM: ret i32 %v2
+	ret i32 %B
+}