[llvm] perf/goldsteinn/reassos nuw nsw mul (PR #97040)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 04:34:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- **[Reassociate] Add tests for preserving `nuw` and `nsw` on `mul` chains; NFC**
- **[Reassociate] Preserve `nuw` and `nsw` on `mul` chains**
---
Full diff: https://github.com/llvm/llvm-project/pull/97040.diff
3 Files Affected:
- (modified) llvm/include/llvm/Transforms/Scalar/Reassociate.h (+4-1)
- (modified) llvm/lib/Transforms/Scalar/Reassociate.cpp (+13-1)
- (modified) llvm/test/Transforms/Reassociate/reassoc-mul-nuw.ll (+156-2)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h
index 84d72df6fc4d8..23d619587cce2 100644
--- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h
@@ -67,10 +67,13 @@ struct OverflowTracking {
bool HasNUW;
bool HasNSW;
bool AllKnownNonNegative;
+ bool AllKnownNonZero;
// Note: AllKnownNonNegative can be true in a case where one of the operands
// is negative, but one the operators is not NSW. AllKnownNonNegative should
// not be used independently of HasNSW
- OverflowTracking() : HasNUW(true), HasNSW(true), AllKnownNonNegative(true) {}
+ OverflowTracking()
+ : HasNUW(true), HasNSW(true), AllKnownNonNegative(true),
+ AllKnownNonZero(true) {}
};
class XorOpnd;
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ce7b95af24291..6b6418b2766c7 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -539,6 +539,16 @@ static bool LinearizeExprTree(Instruction *I,
Ops.push_back(std::make_pair(V, Weight));
if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
+ else if (Opcode == Instruction::Mul) {
+ // To preserve NUW we need all inputs non-zero.
+ // To preserve NSW we need all inputs strictly positive.
+ if (Flags.AllKnownNonZero &&
+ (Flags.HasNUW || (Flags.HasNSW && Flags.AllKnownNonNegative))) {
+ Flags.AllKnownNonZero &= isKnownNonZero(V, SimplifyQuery(DL));
+ if (Flags.HasNSW && Flags.AllKnownNonNegative)
+ Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
+ }
+ }
}
// For nilpotent operations or addition there may be no operands, for example
@@ -725,7 +735,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
// Note that it doesn't hold for mul if one of the operands is zero.
// TODO: We can preserve NUW flag if we prove that all mul operands
// are non-zero.
- if (ExpressionChangedStart->getOpcode() == Instruction::Add) {
+ if (ExpressionChangedStart->getOpcode() == Instruction::Add ||
+ (ExpressionChangedStart->getOpcode() == Instruction::Mul &&
+ Flags.AllKnownNonZero)) {
if (Flags.HasNUW)
ExpressionChangedStart->setHasNoUnsignedWrap();
if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW))
diff --git a/llvm/test/Transforms/Reassociate/reassoc-mul-nuw.ll b/llvm/test/Transforms/Reassociate/reassoc-mul-nuw.ll
index 682fad8d222b7..7479e650491f8 100644
--- a/llvm/test/Transforms/Reassociate/reassoc-mul-nuw.ll
+++ b/llvm/test/Transforms/Reassociate/reassoc-mul-nuw.ll
@@ -21,8 +21,8 @@ define i4 @nuw_preserve_non_zero(i4 %a, i4 %b, i4 %c) {
; CHECK-NEXT: [[A0:%.*]] = add nuw i4 [[A]], 1
; CHECK-NEXT: [[B0:%.*]] = add nuw i4 [[B]], 1
; CHECK-NEXT: [[C0:%.*]] = add nuw i4 [[C]], 1
-; CHECK-NEXT: [[V0:%.*]] = mul i4 [[B0]], [[A0]]
-; CHECK-NEXT: [[V1:%.*]] = mul i4 [[V0]], [[C0]]
+; CHECK-NEXT: [[V0:%.*]] = mul nuw i4 [[B0]], [[A0]]
+; CHECK-NEXT: [[V1:%.*]] = mul nuw i4 [[V0]], [[C0]]
; CHECK-NEXT: ret i4 [[V1]]
;
%a0 = add nuw i4 %a, 1
@@ -32,3 +32,157 @@ define i4 @nuw_preserve_non_zero(i4 %a, i4 %b, i4 %c) {
%v1 = mul nuw i4 %v0, %b0
ret i4 %v1
}
+
+define i4 @re_order_mul_nuw(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_nuw(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0:%.*]] = add nuw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2:%.*]] = add nuw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[MUL_B:%.*]] = mul nuw i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul nuw i4 [[MUL_B]], [[X2]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul nuw i4 [[MUL_A]], [[X3]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0 = add nuw i4 %xx0, 1
+ %x1 = add nuw i4 %xx1, 1
+ %x2 = add nuw i4 %xx2, 1
+ %x3 = add nuw i4 %xx3, 1
+ %mul_a = mul nuw i4 %x0, %x1
+ %mul_b = mul nuw i4 %x2, %x3
+ %mul_c = mul nuw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
+
+define i4 @re_order_mul_nuw_fail_maybe_zero(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_nuw_fail_maybe_zero(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0:%.*]] = add nsw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2:%.*]] = add nuw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[MUL_B:%.*]] = mul i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul i4 [[MUL_B]], [[X2]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul i4 [[MUL_A]], [[X3]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0 = add nsw i4 %xx0, 1
+ %x1 = add nuw i4 %xx1, 1
+ %x2 = add nuw i4 %xx2, 1
+ %x3 = add nuw i4 %xx3, 1
+ %mul_a = mul nuw i4 %x0, %x1
+ %mul_b = mul nuw i4 %x2, %x3
+ %mul_c = mul nuw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
+
+define i4 @re_order_mul_nsw(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_nsw(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0_NZ:%.*]] = add nuw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1_NZ:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2_NZ:%.*]] = add nuw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3_NZ:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[X0:%.*]] = call i4 @llvm.umin.i4(i4 [[X0_NZ]], i4 7)
+; CHECK-NEXT: [[X1:%.*]] = call i4 @llvm.umin.i4(i4 [[X1_NZ]], i4 7)
+; CHECK-NEXT: [[X2:%.*]] = call i4 @llvm.umin.i4(i4 [[X2_NZ]], i4 7)
+; CHECK-NEXT: [[X3:%.*]] = call i4 @llvm.umin.i4(i4 [[X3_NZ]], i4 7)
+; CHECK-NEXT: [[MUL_B:%.*]] = mul nsw i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul nsw i4 [[MUL_B]], [[X2]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul nsw i4 [[MUL_A]], [[X3]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0_nz = add nuw i4 %xx0, 1
+ %x1_nz = add nuw i4 %xx1, 1
+ %x2_nz = add nuw i4 %xx2, 1
+ %x3_nz = add nuw i4 %xx3, 1
+ %x0 = call i4 @llvm.umin.i4(i4 %x0_nz, i4 7)
+ %x1 = call i4 @llvm.umin.i4(i4 %x1_nz, i4 7)
+ %x2 = call i4 @llvm.umin.i4(i4 %x2_nz, i4 7)
+ %x3 = call i4 @llvm.umin.i4(i4 %x3_nz, i4 7)
+ %mul_a = mul nsw i4 %x0, %x1
+ %mul_b = mul nsw i4 %x2, %x3
+ %mul_c = mul nsw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
+
+define i4 @re_order_mul_nsw_nuw(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_nsw_nuw(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0:%.*]] = add nuw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2:%.*]] = add nuw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[MUL_B:%.*]] = mul nuw nsw i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul nuw nsw i4 [[MUL_B]], [[X2]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul nuw nsw i4 [[MUL_A]], [[X3]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0 = add nuw i4 %xx0, 1
+ %x1 = add nuw i4 %xx1, 1
+ %x2 = add nuw i4 %xx2, 1
+ %x3 = add nuw i4 %xx3, 1
+ %mul_a = mul nuw nsw i4 %x0, %x1
+ %mul_b = mul nuw nsw i4 %x2, %x3
+ %mul_c = mul nuw nsw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
+
+define i4 @re_order_mul_fail_maybe_neg(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_fail_maybe_neg(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0_NZ:%.*]] = add nuw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1_NZ:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2_NZ:%.*]] = add nuw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[X0:%.*]] = call i4 @llvm.umin.i4(i4 [[X0_NZ]], i4 7)
+; CHECK-NEXT: [[X1:%.*]] = call i4 @llvm.umin.i4(i4 [[X1_NZ]], i4 7)
+; CHECK-NEXT: [[X2:%.*]] = call i4 @llvm.umin.i4(i4 [[X2_NZ]], i4 7)
+; CHECK-NEXT: [[MUL_B:%.*]] = mul i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul i4 [[MUL_B]], [[X3]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul i4 [[MUL_A]], [[X2]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0_nz = add nuw i4 %xx0, 1
+ %x1_nz = add nuw i4 %xx1, 1
+ %x2_nz = add nuw i4 %xx2, 1
+ %x3 = add nuw i4 %xx3, 1
+ %x0 = call i4 @llvm.umin.i4(i4 %x0_nz, i4 7)
+ %x1 = call i4 @llvm.umin.i4(i4 %x1_nz, i4 7)
+ %x2 = call i4 @llvm.umin.i4(i4 %x2_nz, i4 7)
+ %mul_a = mul nsw i4 %x0, %x1
+ %mul_b = mul nsw i4 %x2, %x3
+ %mul_c = mul nsw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
+
+define i4 @re_order_mul_nsw_fail_maybe_z(i4 %xx0, i4 %xx1, i4 %xx2, i4 %xx3) {
+; CHECK-LABEL: define i4 @re_order_mul_nsw_fail_maybe_z(
+; CHECK-SAME: i4 [[XX0:%.*]], i4 [[XX1:%.*]], i4 [[XX2:%.*]], i4 [[XX3:%.*]]) {
+; CHECK-NEXT: [[X0_NZ:%.*]] = add nuw i4 [[XX0]], 1
+; CHECK-NEXT: [[X1_NZ:%.*]] = add nuw i4 [[XX1]], 1
+; CHECK-NEXT: [[X2_NZ:%.*]] = add nsw i4 [[XX2]], 1
+; CHECK-NEXT: [[X3_NZ:%.*]] = add nuw i4 [[XX3]], 1
+; CHECK-NEXT: [[X0:%.*]] = call i4 @llvm.umin.i4(i4 [[X0_NZ]], i4 7)
+; CHECK-NEXT: [[X1:%.*]] = call i4 @llvm.umin.i4(i4 [[X1_NZ]], i4 7)
+; CHECK-NEXT: [[X2:%.*]] = call i4 @llvm.umin.i4(i4 [[X2_NZ]], i4 7)
+; CHECK-NEXT: [[X3:%.*]] = call i4 @llvm.umin.i4(i4 [[X3_NZ]], i4 7)
+; CHECK-NEXT: [[MUL_B:%.*]] = mul i4 [[X1]], [[X0]]
+; CHECK-NEXT: [[MUL_A:%.*]] = mul i4 [[MUL_B]], [[X2]]
+; CHECK-NEXT: [[MUL_C:%.*]] = mul i4 [[MUL_A]], [[X3]]
+; CHECK-NEXT: ret i4 [[MUL_C]]
+;
+ %x0_nz = add nuw i4 %xx0, 1
+ %x1_nz = add nuw i4 %xx1, 1
+ %x2_nz = add nsw i4 %xx2, 1
+ %x3_nz = add nuw i4 %xx3, 1
+ %x0 = call i4 @llvm.umin.i4(i4 %x0_nz, i4 7)
+ %x1 = call i4 @llvm.umin.i4(i4 %x1_nz, i4 7)
+ %x2 = call i4 @llvm.umin.i4(i4 %x2_nz, i4 7)
+ %x3 = call i4 @llvm.umin.i4(i4 %x3_nz, i4 7)
+ %mul_a = mul nsw i4 %x0, %x1
+ %mul_b = mul nsw i4 %x2, %x3
+ %mul_c = mul nsw i4 %mul_a, %mul_b
+ ret i4 %mul_c
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/97040
More information about the llvm-commits
mailing list