[llvm] 01cea7a - [InstCombine] extractvalue (any_mul_with_overflow X, 2^n), 0 -> X << n
Chenbing Zheng via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 7 20:15:16 PDT 2022
Author: Chenbing Zheng
Date: 2022-09-08T11:12:55+08:00
New Revision: 01cea7ac1005a4e31c5328a96ddc6ff39c7df936
URL: https://github.com/llvm/llvm-project/commit/01cea7ac1005a4e31c5328a96ddc6ff39c7df936
DIFF: https://github.com/llvm/llvm-project/commit/01cea7ac1005a4e31c5328a96ddc6ff39c7df936.diff
LOG: [InstCombine] extractvalue (any_mul_with_overflow X, 2^n), 0 -> X << n
Alive2: https://alive2.llvm.org/ce/z/JLmabt (umul)
https://alive2.llvm.org/ce/z/J_ruXR (smul)
https://alive2.llvm.org/ce/z/o9SVSz (vector)
Reviewed By: spatel, RKSimon
Differential Revision: https://reviews.llvm.org/D133188
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/with_overflow.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index dc6943dab645..fc9c940731f2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3280,15 +3280,20 @@ InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
if (!WO)
return nullptr;
- // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
Intrinsic::ID OvID = WO->getIntrinsicID();
const APInt *C = nullptr;
if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
- if (*EV.idx_begin() == 0 &&
- (OvID == Intrinsic::smul_with_overflow ||
- OvID == Intrinsic::umul_with_overflow) &&
- C->isAllOnes()) {
- return BinaryOperator::CreateNeg(WO->getArgOperand(0));
+ if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
+ OvID == Intrinsic::umul_with_overflow)) {
+ // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
+ if (C->isAllOnes())
+ return BinaryOperator::CreateNeg(WO->getLHS());
+ // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
+ if (C->isPowerOf2()) {
+ return BinaryOperator::CreateShl(
+ WO->getLHS(),
+ ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
+ }
}
}
diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll
index e6488d303e28..6098e036bd0d 100644
--- a/llvm/test/Transforms/InstCombine/with_overflow.ll
+++ b/llvm/test/Transforms/InstCombine/with_overflow.ll
@@ -254,11 +254,10 @@ define i32 @umultest3(i32 %n) nounwind {
define i32 @umultest4(i32 %n) nounwind {
; CHECK-LABEL: @umultest4(
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[N:%.*]], 1
-; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[SHR]], i32 4)
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; CHECK-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
-; CHECK-NEXT: [[RET:%.*]] = select i1 [[OV]], i32 -1, i32 [[RES]]
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N:%.*]], 1
+; CHECK-NEXT: [[RES:%.*]] = and i32 [[TMP1]], -4
+; CHECK-NEXT: [[OV_INV:%.*]] = icmp sgt i32 [[N]], -1
+; CHECK-NEXT: [[RET:%.*]] = select i1 [[OV_INV]], i32 [[RES]], i32 -1
; CHECK-NEXT: ret i32 [[RET]]
;
%shr = lshr i32 %n, 1
@@ -890,13 +889,10 @@ define i8 @umul_neg1_select(i8 %x) {
ret i8 %r
}
-; TODO: this could be 'shl' and 'icmp'
-
define i8 @umul_2(i8 %x, i1* %p) {
; CHECK-LABEL: @umul_2(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 2)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 1
+; CHECK-NEXT: [[OV:%.*]] = icmp slt i8 [[X]], 0
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -909,9 +905,8 @@ define i8 @umul_2(i8 %x, i1* %p) {
define i8 @umul_8(i8 %x, i1* %p) {
; CHECK-LABEL: @umul_8(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 8)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 3
+; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 31
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -924,9 +919,8 @@ define i8 @umul_8(i8 %x, i1* %p) {
define i8 @umul_64(i8 %x, i1* %p) {
; CHECK-LABEL: @umul_64(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 64)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 6
+; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 3
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -949,6 +943,22 @@ define i8 @umul_256(i8 %x, i1* %p) {
ret i8 %r
}
+define <4 x i8> @umul_4_vec_poison(<4 x i8> %x, <4 x i1>* %p) {
+; CHECK-LABEL: @umul_4_vec_poison(
+; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], <i8 63, i8 63, i8 63, i8 63>
+; CHECK-NEXT: store <4 x i1> [[OV]], <4 x i1>* [[P:%.*]], align 1
+; CHECK-NEXT: ret <4 x i8> [[R]]
+;
+ %m = tail call { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> <i8 poison, i8 4, i8 4, i8 poison>)
+ %r = extractvalue { <4 x i8>, <4 x i1> } %m, 0
+ %ov = extractvalue { <4 x i8>, <4 x i1> } %m, 1
+ store <4 x i1> %ov, <4 x i1>* %p
+ ret <4 x i8> %r
+}
+
+; Negative test: not PowerOf2
+
define i8 @umul_3(i8 %x, i1* %p) {
; CHECK-LABEL: @umul_3(
; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 3)
@@ -966,9 +976,9 @@ define i8 @umul_3(i8 %x, i1* %p) {
define i8 @smul_4(i8 %x, i1* %p) {
; CHECK-LABEL: @smul_4(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 4)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -32
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -64
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -981,9 +991,9 @@ define i8 @smul_4(i8 %x, i1* %p) {
define i8 @smul_16(i8 %x, i1* %p) {
; CHECK-LABEL: @smul_16(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 16)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -8
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -16
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -996,9 +1006,9 @@ define i8 @smul_16(i8 %x, i1* %p) {
define i8 @smul_32(i8 %x, i1* %p) {
; CHECK-LABEL: @smul_32(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 32)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -4
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -8
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -1011,9 +1021,8 @@ define i8 @smul_32(i8 %x, i1* %p) {
define i8 @smul_128(i8 %x, i1* %p) {
; CHECK-LABEL: @smul_128(
-; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 -128)
-; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1
+; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 7
+; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 1
; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1
; CHECK-NEXT: ret i8 [[R]]
;
@@ -1024,6 +1033,23 @@ define i8 @smul_128(i8 %x, i1* %p) {
ret i8 %r
}
+define <4 x i8> @smul_2_vec_poison(<4 x i8> %x, <4 x i1>* %p) {
+; CHECK-LABEL: @smul_2_vec_poison(
+; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], <i8 64, i8 64, i8 64, i8 64>
+; CHECK-NEXT: [[OV:%.*]] = icmp slt <4 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT: store <4 x i1> [[OV]], <4 x i1>* [[P:%.*]], align 1
+; CHECK-NEXT: ret <4 x i8> [[R]]
+;
+ %m = tail call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> <i8 poison, i8 2, i8 2, i8 poison>)
+ %r = extractvalue { <4 x i8>, <4 x i1> } %m, 0
+ %ov = extractvalue { <4 x i8>, <4 x i1> } %m, 1
+ store <4 x i1> %ov, <4 x i1>* %p
+ ret <4 x i8> %r
+}
+
+; Negative test: not PowerOf2
+
define i8 @smul_7(i8 %x, i1* %p) {
; CHECK-LABEL: @smul_7(
; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 7)
More information about the llvm-commits
mailing list