[llvm] 8d48766 - [CVP] Soften SDiv into a UDiv as long as we know domains of both of the operands.
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 18 08:00:25 PDT 2020
Author: Roman Lebedev
Date: 2020-07-18T17:59:56+03:00
New Revision: 8d487668d09fb0e4e54f36207f07c1480ffabbfd
URL: https://github.com/llvm/llvm-project/commit/8d487668d09fb0e4e54f36207f07c1480ffabbfd
DIFF: https://github.com/llvm/llvm-project/commit/8d487668d09fb0e4e54f36207f07c1480ffabbfd.diff
LOG: [CVP] Soften SDiv into a UDiv as long as we know domains of both of the operands.
Yes, if operands are non-positive this comes at the extra cost
of two extra negations. But a. division is already just
ridiculously costly, two more subtractions can't hurt much :)
and b. we have better/more analyzes/folds for an unsigned division,
we could end up narrowing it's bitwidth, converting it to lshr, etc.
This is essentially a take two on 0fdcca07ad2c0bdc2cdd40ba638109926f4f513b,
which didn't fix the potential regression i was seeing,
because ValueTracking's computeKnownBits() doesn't make use
of dominating conditions in it's analysis.
While i could teach it that, this seems like the more general fix.
This big hammer actually does catch said potential regression.
Over vanilla test-suite + RawSpeed + darktable
(10M IR instrs, 1M IR BB, 1M X86 ASM instrs), this fires/converts 5 more
(+2%) SDiv's, the total instruction count at the end of middle-end pipeline
is only +6, so out of +10 extra negations, ~half are folded away,
and asm instr count is only +1, so practically speaking all extra
negations are folded away and are therefore free.
Sadly, all these new UDiv's remained, none folded away.
But there are two less basic blocks.
https://rise4fun.com/Alive/VS6
Name: v0
Pre: C0 >= 0 && C1 >= 0
%r = sdiv i8 C0, C1
=>
%r = udiv i8 C0, C1
Name: v1
Pre: C0 <= 0 && C1 >= 0
%r = sdiv i8 C0, C1
=>
%t0 = udiv i8 -C0, C1
%r = sub i8 0, %t0
Name: v2
Pre: C0 >= 0 && C1 <= 0
%r = sdiv i8 C0, C1
=>
%t0 = udiv i8 C0, -C1
%r = sub i8 0, %t0
Name: v3
Pre: C0 <= 0 && C1 <= 0
%r = sdiv i8 C0, C1
=>
%r = udiv i8 -C0, -C1
Added:
Modified:
llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 284312eaf822..fb7a005708e5 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -607,6 +607,12 @@ static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
return Result == LazyValueInfo::True;
}
+static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ Constant *Zero = ConstantInt::get(V->getType(), 0);
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI);
+ return Result == LazyValueInfo::True;
+}
+
static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) {
return all_of(SDI->operands(),
[&](Value *Op) { return isNonNegative(Op, LVI, SDI); });
@@ -672,24 +678,65 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
}
/// See if LazyValueInfo's ability to exploit edge conditions or range
-/// information is sufficient to prove the both operands of this SDiv are
-/// positive. If this is the case, replace the SDiv with a UDiv. Even for local
+/// information is sufficient to prove the signs of both operands of this SDiv.
+/// If this is the case, replace the SDiv with a UDiv. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
- if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI))
+ if (SDI->getType()->isVectorTy())
return false;
+ enum class Domain { NonNegative, NonPositive, Unknown };
+ auto getDomain = [&](Value *V) {
+ if (isNonNegative(V, LVI, SDI))
+ return Domain::NonNegative;
+ if (isNonPositive(V, LVI, SDI))
+ return Domain::NonPositive;
+ return Domain::Unknown;
+ };
+
+ struct Operand {
+ Value *V;
+ Domain Domain;
+ };
+ std::array<Operand, 2> Ops;
+ for (const auto &I : zip(Ops, SDI->operands())) {
+ Operand &Op = std::get<0>(I);
+ Op.V = std::get<1>(I);
+ Op.Domain = getDomain(Op.V);
+ if (Op.Domain == Domain::Unknown)
+ return false;
+ }
+
+ // We know domains of both of the operands!
++NumSDivs;
- auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
- SDI->getName(), SDI);
- BO->setDebugLoc(SDI->getDebugLoc());
- BO->setIsExact(SDI->isExact());
- SDI->replaceAllUsesWith(BO);
+
+ // We need operands to be non-negative, so negate each one that isn't.
+ for (Operand &Op : Ops) {
+ if (Op.Domain == Domain::NonNegative)
+ continue;
+ auto *BO =
+ BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
+ Op.V = BO;
+ }
+
+ auto *UDiv =
+ BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ UDiv->setDebugLoc(SDI->getDebugLoc());
+ UDiv->setIsExact(SDI->isExact());
+
+ Value *Res = UDiv;
+
+ // If the operands had two
diff erent domains, we need to negate the result.
+ if (Ops[0].Domain != Ops[1].Domain)
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+ SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
// Try to simplify our new udiv.
- processUDivOrURem(BO, LVI);
+ processUDivOrURem(UDiv, LVI);
return true;
}
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
index ec5de0010a14..8dfa09d47792 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
@@ -177,8 +177,10 @@ define i32 @test7_pos_neg(i32 %x, i32 %y) {
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT: ret i32 [[DIV]]
+; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]]
+; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X]], [[Y_NONNEG]]
+; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]]
+; CHECK-NEXT: ret i32 [[DIV1_NEG]]
;
%c0 = icmp sge i32 %x, 0
call void @llvm.assume(i1 %c0)
@@ -194,8 +196,10 @@ define i32 @test8_neg_pos(i32 %x, i32 %y) {
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
; CHECK-NEXT: [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT: ret i32 [[DIV]]
+; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]]
+; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y]]
+; CHECK-NEXT: [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]]
+; CHECK-NEXT: ret i32 [[DIV1_NEG]]
;
%c0 = icmp sle i32 %x, 0
call void @llvm.assume(i1 %c0)
@@ -211,8 +215,10 @@ define i32 @test9_neg_neg(i32 %x, i32 %y) {
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
; CHECK-NEXT: [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT: ret i32 [[DIV]]
+; CHECK-NEXT: [[X_NONNEG:%.*]] = sub i32 0, [[X]]
+; CHECK-NEXT: [[Y_NONNEG:%.*]] = sub i32 0, [[Y]]
+; CHECK-NEXT: [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y_NONNEG]]
+; CHECK-NEXT: ret i32 [[DIV1]]
;
%c0 = icmp sle i32 %x, 0
call void @llvm.assume(i1 %c0)
More information about the llvm-commits
mailing list