[llvm] r357012 - [InstCombine] form uaddsat from add+umin (PR14613)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 10:50:08 PDT 2019
Author: spatel
Date: Tue Mar 26 10:50:08 2019
New Revision: 357012
URL: http://llvm.org/viewvc/llvm-project?rev=357012&view=rev
Log:
[InstCombine] form uaddsat from add+umin (PR14613)
This is the last step towards solving the examples shown in:
https://bugs.llvm.org/show_bug.cgi?id=14613
With this change, x86 should end up with psubus instructions
when those are available.
All known codegen issues with expanding the saturating intrinsics
were resolved with:
D59006 / rL356855
We also have some early evidence in D58872 that using the intrinsics
will lead to better perf. If some target regresses from this, custom
lowering of the intrinsics (as in the above for x86) may be needed.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll
llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp?rev=357012&r1=357011&r2=357012&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp Tue Mar 26 10:50:08 2019
@@ -1064,6 +1064,28 @@ static Instruction *canonicalizeLowbitMa
return BinaryOperator::CreateNot(NotMask, I.getName());
}
+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+ Type *Ty = I.getType();
+ auto getUAddSat = [&]() {
+ return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+ };
+
+ // add (umin X, ~Y), Y --> uaddsat X, Y
+ Value *X, *Y;
+ if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+ m_Deferred(Y))))
+ return CallInst::Create(getUAddSat(), { X, Y });
+
+ // add (umin X, ~C), C --> uaddsat X, C
+ const APInt *C, *NotC;
+ if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+ *C == ~*NotC)
+ return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1266,6 +1288,9 @@ Instruction *InstCombiner::visitAdd(Bina
if (Instruction *V = canonicalizeLowbitMask(I, Builder))
return V;
+ if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+ return SatAdd;
+
return Changed ? &I : nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll?rev=357012&r1=357011&r2=357012&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll Tue Mar 26 10:50:08 2019
@@ -1134,9 +1134,7 @@ define <2 x i33> @add_umax_vec(<2 x i33>
define i8 @PR14613_umin(i8 %x) {
; CHECK-LABEL: @PR14613_umin(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16
-; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
-; CHECK-NEXT: [[U7:%.*]] = add i8 [[TMP2]], 15
+; CHECK-NEXT: [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
; CHECK-NEXT: ret i8 [[U7]]
;
%u4 = zext i8 %x to i32
Modified: llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll?rev=357012&r1=357011&r2=357012&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll Tue Mar 26 10:50:08 2019
@@ -1254,10 +1254,7 @@ declare <2 x i8> @get_v2i8()
define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
; CHECK-LABEL: @unsigned_sat_variable_using_min_add(
; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT: [[R:%.*]] = add i32 [[S]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
; CHECK-NEXT: ret i32 [[R]]
;
%y = call i32 @get_i32() ; thwart complexity-based canonicalization
@@ -1271,10 +1268,7 @@ define i32 @unsigned_sat_variable_using_
define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add(
; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
; CHECK-NEXT: ret i32 [[R]]
;
%y = call i32 @get_i32() ; thwart complexity-based canonicalization
@@ -1288,10 +1282,7 @@ define i32 @unsigned_sat_variable_using_
define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_select(
; CHECK-NEXT: [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[S]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@@ -1305,10 +1296,7 @@ define <2 x i8> @unsigned_sat_variable_u
define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) {
; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add_select(
; CHECK-NEXT: [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y]], [[S]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@@ -1319,13 +1307,49 @@ define <2 x i8> @unsigned_sat_variable_u
ret <2 x i8> %r
}
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_min(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min(
+; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+ %noty = xor i32 %y, -1
+ %c = icmp slt i32 %x, %noty
+ %s = select i1 %c, i32 %x, i32 %noty
+ %r = add i32 %y, %s
+ ret i32 %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_value(i32 %x, i32 %z) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value(
+; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT: [[R:%.*]] = add i32 [[S]], [[Z:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+ %noty = xor i32 %y, -1
+ %c = icmp ult i32 %x, %noty
+ %s = select i1 %c, i32 %x, i32 %noty
+ %r = add i32 %z, %s
+ ret i32 %r
+}
+
; If we have a constant operand, there's no commutativity variation.
define i32 @unsigned_sat_constant_using_min(i32 %x) {
; CHECK-LABEL: @unsigned_sat_constant_using_min(
-; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
-; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
-; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[S]], -43
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 -43)
; CHECK-NEXT: ret i32 [[R]]
;
%c = icmp ult i32 %x, 42
@@ -1334,3 +1358,28 @@ define i32 @unsigned_sat_constant_using_
ret i32 %r
}
+define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_splat(
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>)
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %c = icmp ult <2 x i32> %x, <i32 14, i32 14>
+ %s = select <2 x i1> %c, <2 x i32> %x, <2 x i32> <i32 14, i32 14>
+ %r = add <2 x i32> %s, <i32 -15, i32 -15>
+ ret <2 x i32> %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
+; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[S]], -42
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ult i32 %x, 42
+ %s = select i1 %c, i32 %x, i32 42
+ %r = add i32 %s, -42
+ ret i32 %r
+}
More information about the llvm-commits
mailing list