[llvm-branch-commits] [llvm] e6f2a79 - [InstCombine] canonicalizeSaturatedAdd(): last fold is only valid for strict comparison (PR48390)
Roman Lebedev via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 9 07:24:15 PST 2020
Author: Roman Lebedev
Date: 2020-12-09T18:19:09+03:00
New Revision: e6f2a79d7aa01f8dd7f0194f97a50b480e8ede71
URL: https://github.com/llvm/llvm-project/commit/e6f2a79d7aa01f8dd7f0194f97a50b480e8ede71
DIFF: https://github.com/llvm/llvm-project/commit/e6f2a79d7aa01f8dd7f0194f97a50b480e8ede71.diff
LOG: [InstCombine] canonicalizeSaturatedAdd(): last fold is only valid for strict comparison (PR48390)
We could create uadd.sat under incorrect circumstances
if a select with -1 as the false value was canonicalized
by swapping the T/F values. Unlike the other transforms
in the same function, it is not invariant to equality.
Some alive proofs: https://alive2.llvm.org/ce/z/emmKKL
Based on original patch by David Green!
Fixes https://bugs.llvm.org/show_bug.cgi?id=48390
Differential Revision: https://reviews.llvm.org/D92717
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/saturating-add-sub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ea2a02a1bba7..397cb0b0e187 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -765,25 +765,24 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
// There are 8 commuted variants.
- // Canonicalize -1 (saturated result) to true value of the select. Just
- // swapping the compare operands is legal, because the selected value is the
- // same in case of equality, so we can interchange u< and u<=.
+ // Canonicalize -1 (saturated result) to true value of the select.
if (match(FVal, m_AllOnes())) {
std::swap(TVal, FVal);
- std::swap(Cmp0, Cmp1);
+ Pred = CmpInst::getInversePredicate(Pred);
}
if (!match(TVal, m_AllOnes()))
return nullptr;
- // Canonicalize predicate to 'ULT'.
- if (Pred == ICmpInst::ICMP_UGT) {
- Pred = ICmpInst::ICMP_ULT;
+ // Canonicalize predicate to less-than or less-or-equal-than.
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
std::swap(Cmp0, Cmp1);
+ Pred = CmpInst::getSwappedPredicate(Pred);
}
- if (Pred != ICmpInst::ICMP_ULT)
+ if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE)
return nullptr;
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ // Strictness of the comparison is irrelevant.
Value *Y;
if (match(Cmp0, m_Not(m_Value(X))) &&
match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
@@ -792,6 +791,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
}
// The 'not' op may be included in the sum but not the compare.
+ // Strictness of the comparison is irrelevant.
X = Cmp0;
Y = Cmp1;
if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
@@ -802,7 +802,9 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
}
// The overflow may be detected via the add wrapping round.
- if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
+ // This is only valid for strict comparison!
+ if (Pred == ICmpInst::ICMP_ULT &&
+ match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {
// ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)
// ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index c5504ccbb94a..b132e0bdb400 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1801,8 +1801,10 @@ define i32 @uadd_sat_via_add(i32 %x, i32 %y) {
define i32 @uadd_sat_via_add_nonstrict(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_via_add_nonstrict(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
;
%a = add i32 %x, %y
%c = icmp ule i32 %a, %y
@@ -1823,8 +1825,10 @@ define i32 @uadd_sat_via_add_swapped_select(i32 %x, i32 %y) {
define i32 @uadd_sat_via_add_swapped_select_strict(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_via_add_swapped_select_strict(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
;
%a = add i32 %x, %y
%c = icmp ugt i32 %a, %y
@@ -1845,8 +1849,10 @@ define i32 @uadd_sat_via_add_swapped_cmp(i32 %x, i32 %y) {
define i32 @uadd_sat_via_add_swapped_cmp_nonstrict(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_nonstrict(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
;
%a = add i32 %x, %y
%c = icmp uge i32 %y, %a
@@ -1867,8 +1873,10 @@ define i32 @uadd_sat_via_add_swapped_cmp_nonstric(i32 %x, i32 %y) {
define i32 @uadd_sat_via_add_swapped_cmp_select_nonstrict(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_select_nonstrict(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
;
%a = add i32 %x, %y
%c = icmp ult i32 %y, %a
More information about the llvm-branch-commits
mailing list