[llvm] 54d8ebb - [AggressiveInstCombine] Add `udiv` and `urem` instrs to TruncInstCombine DAG
Anton Afanasyev via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 10 10:30:56 PDT 2021
Author: Anton Afanasyev
Date: 2021-09-10T20:29:08+03:00
New Revision: 54d8ebbbfdb348a6d0354fe7909d9ab146dada5b
URL: https://github.com/llvm/llvm-project/commit/54d8ebbbfdb348a6d0354fe7909d9ab146dada5b
DIFF: https://github.com/llvm/llvm-project/commit/54d8ebbbfdb348a6d0354fe7909d9ab146dada5b.diff
LOG: [AggressiveInstCombine] Add `udiv` and `urem` instrs to TruncInstCombine DAG
Add `udiv` and `urem` instructions to the DAG post-dominated by `trunc`,
allowing TruncInstCombine to reduce bitwidth of expressions containing these
instructions. It is sufficient to require that all truncated bits of both
operands are zeros: https://alive2.llvm.org/ce/z/yiithn
(`urem` case is identical).
Differential Revision: https://reviews.llvm.org/D109515
Added:
Modified:
llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index e3a785aee1570..1a6818e74784d 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -65,6 +65,8 @@ static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
Ops.push_back(I->getOperand(0));
Ops.push_back(I->getOperand(1));
break;
@@ -134,6 +136,8 @@ bool TruncInstCombine::buildTruncExpressionDag() {
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
case Instruction::Select: {
SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);
@@ -143,7 +147,7 @@ bool TruncInstCombine::buildTruncExpressionDag() {
default:
// TODO: Can handle more cases here:
// 1. shufflevector, extractelement, insertelement
- // 2. udiv, urem
+ // 2. sdiv, srem
// 3. phi node(and loop handling)
// ...
return false;
@@ -306,6 +310,18 @@ Type *TruncInstCombine::getBestTruncatedType() {
return nullptr;
Itr.second.MinBitWidth = MinBitWidth;
}
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::URem) {
+ unsigned MinBitWidth = 0;
+ for (const auto &Op : I->operands()) {
+ KnownBits Known = computeKnownBits(Op);
+ MinBitWidth =
+ std::max(Known.getMaxValue().getActiveBits(), MinBitWidth);
+ if (MinBitWidth >= OrigBitWidth)
+ return nullptr;
+ }
+ Itr.second.MinBitWidth = MinBitWidth;
+ }
}
// Calculate minimum allowed bit-width allowed for shrinking the currently
@@ -397,7 +413,9 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
- case Instruction::AShr: {
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem: {
Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll
index caee7e5963012..8b8b69a9fbe98 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll
@@ -3,10 +3,9 @@
define i16 @udiv_one_arg(i8 %x) {
; CHECK-LABEL: @udiv_one_arg(
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ZEXT]], 42
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[DIV]] to i16
-; CHECK-NEXT: ret i16 [[TRUNC]]
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i16
+; CHECK-NEXT: [[DIV:%.*]] = udiv i16 [[ZEXT]], 42
+; CHECK-NEXT: ret i16 [[DIV]]
;
%zext = zext i8 %x to i32
%div = udiv i32 %zext, 42
@@ -16,11 +15,8 @@ define i16 @udiv_one_arg(i8 %x) {
define i16 @udiv_two_args(i16 %x, i16 %y) {
; CHECK-LABEL: @udiv_two_args(
-; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[Y:%.*]] to i32
-; CHECK-NEXT: [[I0:%.*]] = udiv i32 [[ZEXTX]], [[ZEXTY]]
-; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16
-; CHECK-NEXT: ret i16 [[R]]
+; CHECK-NEXT: [[I0:%.*]] = udiv i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i16 [[I0]]
;
%zextx = zext i16 %x to i32
%zexty = zext i16 %y to i32
@@ -29,6 +25,7 @@ define i16 @udiv_two_args(i16 %x, i16 %y) {
ret i16 %r
}
+; Negative test
define i16 @udiv_big_const(i8 %x) {
; CHECK-LABEL: @udiv_big_const(
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
@@ -44,10 +41,9 @@ define i16 @udiv_big_const(i8 %x) {
define <2 x i16> @udiv_vector(<2 x i8> %x) {
; CHECK-LABEL: @udiv_vector(
-; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT: [[S:%.*]] = udiv <2 x i32> [[Z]], <i32 4, i32 10>
-; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[S]] to <2 x i16>
-; CHECK-NEXT: ret <2 x i16> [[T]]
+; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT: [[S:%.*]] = udiv <2 x i16> [[Z]], <i16 4, i16 10>
+; CHECK-NEXT: ret <2 x i16> [[S]]
;
%z = zext <2 x i8> %x to <2 x i32>
%s = udiv <2 x i32> %z, <i32 4, i32 10>
@@ -55,6 +51,7 @@ define <2 x i16> @udiv_vector(<2 x i8> %x) {
ret <2 x i16> %t
}
+; Negative test: can only fold to <2 x i16>, requiring new vector type
define <2 x i8> @udiv_vector_need_new_vector_type(<2 x i8> %x) {
; CHECK-LABEL: @udiv_vector_need_new_vector_type(
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -68,6 +65,7 @@ define <2 x i8> @udiv_vector_need_new_vector_type(<2 x i8> %x) {
ret <2 x i8> %t
}
+; Negative test
define <2 x i16> @udiv_vector_big_const(<2 x i8> %x) {
; CHECK-LABEL: @udiv_vector_big_const(
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -83,11 +81,8 @@ define <2 x i16> @udiv_vector_big_const(<2 x i8> %x) {
define i16 @udiv_exact(i16 %x, i16 %y) {
; CHECK-LABEL: @udiv_exact(
-; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[Y:%.*]] to i32
-; CHECK-NEXT: [[I0:%.*]] = udiv exact i32 [[ZEXTX]], [[ZEXTY]]
-; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16
-; CHECK-NEXT: ret i16 [[R]]
+; CHECK-NEXT: [[I0:%.*]] = udiv exact i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i16 [[I0]]
;
%zextx = zext i16 %x to i32
%zexty = zext i16 %y to i32
@@ -99,10 +94,9 @@ define i16 @udiv_exact(i16 %x, i16 %y) {
define i16 @urem_one_arg(i8 %x) {
; CHECK-LABEL: @urem_one_arg(
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT: [[DIV:%.*]] = urem i32 [[ZEXT]], 42
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[DIV]] to i16
-; CHECK-NEXT: ret i16 [[TRUNC]]
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i16
+; CHECK-NEXT: [[DIV:%.*]] = urem i16 [[ZEXT]], 42
+; CHECK-NEXT: ret i16 [[DIV]]
;
%zext = zext i8 %x to i32
%div = urem i32 %zext, 42
@@ -112,11 +106,8 @@ define i16 @urem_one_arg(i8 %x) {
define i16 @urem_two_args(i16 %x, i16 %y) {
; CHECK-LABEL: @urem_two_args(
-; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[Y:%.*]] to i32
-; CHECK-NEXT: [[I0:%.*]] = urem i32 [[ZEXTX]], [[ZEXTY]]
-; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16
-; CHECK-NEXT: ret i16 [[R]]
+; CHECK-NEXT: [[I0:%.*]] = urem i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i16 [[I0]]
;
%zextx = zext i16 %x to i32
%zexty = zext i16 %y to i32
@@ -125,6 +116,7 @@ define i16 @urem_two_args(i16 %x, i16 %y) {
ret i16 %r
}
+; Negative test
define i16 @urem_big_const(i8 %x) {
; CHECK-LABEL: @urem_big_const(
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
@@ -140,10 +132,9 @@ define i16 @urem_big_const(i8 %x) {
define <2 x i16> @urem_vector(<2 x i8> %x) {
; CHECK-LABEL: @urem_vector(
-; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT: [[S:%.*]] = urem <2 x i32> [[Z]], <i32 4, i32 10>
-; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[S]] to <2 x i16>
-; CHECK-NEXT: ret <2 x i16> [[T]]
+; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT: [[S:%.*]] = urem <2 x i16> [[Z]], <i16 4, i16 10>
+; CHECK-NEXT: ret <2 x i16> [[S]]
;
%z = zext <2 x i8> %x to <2 x i32>
%s = urem <2 x i32> %z, <i32 4, i32 10>
@@ -151,6 +142,7 @@ define <2 x i16> @urem_vector(<2 x i8> %x) {
ret <2 x i16> %t
}
+; Negative test: can only fold to <2 x i16>, requiring new vector type
define <2 x i8> @urem_vector_need_new_vector_type(<2 x i8> %x) {
; CHECK-LABEL: @urem_vector_need_new_vector_type(
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -164,6 +156,7 @@ define <2 x i8> @urem_vector_need_new_vector_type(<2 x i8> %x) {
ret <2 x i8> %t
}
+; Negative test
define <2 x i16> @urem_vector_big_const(<2 x i8> %x) {
; CHECK-LABEL: @urem_vector_big_const(
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
More information about the llvm-commits
mailing list