[llvm] aca5aeb - [InstCombine] Add freezeAllUsesOfArgument to visitFreeze
hyeongyu kim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 24 02:09:05 PDT 2021
Author: hyeongyu kim
Date: 2021-07-24T18:08:58+09:00
New Revision: aca5aeb7523d7868a4b0706330dcdfc58c0adaed
URL: https://github.com/llvm/llvm-project/commit/aca5aeb7523d7868a4b0706330dcdfc58c0adaed
DIFF: https://github.com/llvm/llvm-project/commit/aca5aeb7523d7868a4b0706330dcdfc58c0adaed.diff
LOG: [InstCombine] Add freezeAllUsesOfArgument to visitFreeze
In D106041, a freeze was added before the branch condition to solve the miscompilation problem of SimpleLoopUnswitch.
However, I found that the added freeze disturbed other optimizations in the following situations.
```
arg.fr = freeze(arg)
use(arg.fr)
...
use(arg)
```
It is a problem that occurred when arg and arg.fr were recognized as different values.
Therefore, changing to use arg.fr instead of arg throughout the function eliminates the above problem.
Thus, I add a function that changes all uses of arg to freeze(arg) to visitFreeze of InstCombine.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D106233
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineInternal.h
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/freeze.ll
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8f3b5354820bc..eaa53348028d0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -169,6 +169,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *visitLandingPadInst(LandingPadInst &LI);
Instruction *visitVAEndInst(VAEndInst &I);
Value *pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI);
+ bool freezeDominatedUses(FreezeInst &FI);
Instruction *visitFreeze(FreezeInst &I);
/// Specify what to return for unhandled instructions.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 8e8d8a75f79ae..5bcbffabd760f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3605,6 +3605,22 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
return OrigOp;
}
+bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) {
+ Value *Op = FI.getOperand(0);
+
+ if (isa<Constant>(Op))
+ return false;
+
+ bool Changed = false;
+ Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
+ bool Dominates = DT.dominates(&FI, U);
+ Changed |= Dominates;
+ return Dominates;
+ });
+
+ return Changed;
+}
+
Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
Value *Op0 = I.getOperand(0);
@@ -3648,6 +3664,10 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
return replaceInstUsesWith(I, BestValue);
}
+ // Replace all dominated uses of Op to freeze(Op).
+ if (freezeDominatedUses(I))
+ return &I;
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll
index be2d6146c054c..ac30036b13eaf 100644
--- a/llvm/test/Transforms/InstCombine/freeze.ll
+++ b/llvm/test/Transforms/InstCombine/freeze.ll
@@ -135,3 +135,89 @@ define i32 @early_freeze_test3(i32 %v1) {
%v4.fr = freeze i32 %v4
ret i32 %v4.fr
}
+
+; If replace all dominated uses of v to freeze(v).
+
+define void @freeze_dominated_uses_test1(i32 %v) {
+; CHECK-LABEL: @freeze_dominated_uses_test1(
+; CHECK-NEXT: [[V_FR:%.*]] = freeze i32 [[V:%.*]]
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR]])
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR]])
+; CHECK-NEXT: ret void
+;
+ %v.fr = freeze i32 %v
+ call void @use_i32(i32 %v)
+ call void @use_i32(i32 %v.fr)
+ ret void
+}
+
+define void @freeze_dominated_uses_test2(i32 %v) {
+; CHECK-LABEL: @freeze_dominated_uses_test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @use_i32(i32 [[V:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: [[V_FR:%.*]] = freeze i32 [[V]]
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR]])
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR]])
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @use_i32(i32 [[V]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @use_i32(i32 %v)
+ %cond = icmp eq i32 %v, 0
+ br i1 %cond, label %bb0, label %bb1
+
+bb0:
+ %v.fr = freeze i32 %v
+ call void @use_i32(i32 %v.fr)
+ call void @use_i32(i32 %v)
+ br label %end
+
+bb1:
+ call void @use_i32(i32 %v)
+ br label %end
+
+end:
+ ret void
+}
+
+; If there is a duplicate freeze, it will be removed.
+
+define void @freeze_dominated_uses_test3(i32 %v, i1 %cond) {
+; CHECK-LABEL: @freeze_dominated_uses_test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V_FR1:%.*]] = freeze i32 [[V:%.*]]
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR1]])
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR1]])
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @use_i32(i32 [[V_FR1]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %v.fr1 = freeze i32 %v
+ call void @use_i32(i32 %v.fr1)
+ br i1 %cond, label %bb0, label %bb1
+
+bb0:
+ %v.fr2 = freeze i32 %v
+ call void @use_i32(i32 %v.fr2)
+ br label %end
+
+bb1:
+ call void @use_i32(i32 %v)
+ br label %end
+
+end:
+ ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
index d0b9a9f40c8cb..e0782710370c6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -13,12 +13,11 @@ define float @test_merge_allof_v4sf(<4 x float> %t) {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]]
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: br label [[RETURN]]
@@ -182,12 +181,11 @@ define float @test_separate_allof_v4sf(<4 x float> %t) {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]]
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: br label [[RETURN]]
@@ -357,14 +355,13 @@ define float @test_merge_allof_v4si(<4 x i32> %t) {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
; CHECK-NEXT: br i1 [[TMP5]], label [[RETURN]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
; CHECK-NEXT: br label [[RETURN]]
@@ -515,12 +512,11 @@ define i32 @test_separate_allof_v4si(<4 x i32> %t) {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], i32 0, i32 [[ADD]]
; CHECK-NEXT: br label [[RETURN]]
@@ -594,12 +590,11 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) {
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0
; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[RETURN:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT7]], i32 [[ADD]], i32 0
; CHECK-NEXT: br label [[RETURN]]
More information about the llvm-commits
mailing list