[llvm] 2bfa7d0 - [InstCombine] Fold `fmul X, -0.0` into `copysign(0.0, -X)` (#85772)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 06:48:13 PDT 2024
Author: Yingwei Zheng
Date: 2024-03-21T21:48:10+08:00
New Revision: 2bfa7d0e1691dcff095c602a0387a6d13213dc91
URL: https://github.com/llvm/llvm-project/commit/2bfa7d0e1691dcff095c602a0387a6d13213dc91
DIFF: https://github.com/llvm/llvm-project/commit/2bfa7d0e1691dcff095c602a0387a6d13213dc91.diff
LOG: [InstCombine] Fold `fmul X, -0.0` into `copysign(0.0, -X)` (#85772)
`fneg + copysign` is better than fmul for analysis/codegen.
godbolt: https://godbolt.org/z/eEs6dGd1G
Alive2: https://alive2.llvm.org/ce/z/K3M5BA
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
llvm/test/Transforms/InstCombine/binop-itofp.ll
llvm/test/Transforms/InstCombine/fmul.ll
llvm/test/Transforms/InstCombine/fpcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9d4c271f990d19..6e05fd8fb4d6d1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -814,8 +814,19 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (match(Op1, m_SpecificFP(-1.0)))
return UnaryOperator::CreateFNegFMF(Op0, &I);
- // With no-nans: X * 0.0 --> copysign(0.0, X)
- if (I.hasNoNaNs() && match(Op1, m_PosZeroFP())) {
+ // With no-nans/no-infs:
+ // X * 0.0 --> copysign(0.0, X)
+ // X * -0.0 --> copysign(0.0, -X)
+ const APFloat *FPC;
+ if (match(Op1, m_APFloatAllowUndef(FPC)) && FPC->isZero() &&
+ ((I.hasNoInfs() &&
+ isKnownNeverNaN(Op0, /*Depth=*/0, SQ.getWithInstruction(&I))) ||
+ isKnownNeverNaN(&I, /*Depth=*/0, SQ.getWithInstruction(&I)))) {
+ if (FPC->isNegative())
+ Op0 = Builder.CreateFNegFMF(Op0, &I);
+ Op1 = Constant::replaceUndefsWith(
+ cast<Constant>(Op1),
+ ConstantFP::get(Op1->getType()->getScalarType(), *FPC));
CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign,
{I.getType()}, {Op1, Op0}, &I);
return replaceInstUsesWith(I, CopySign);
diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll
index 82cdb3ce6bee61..cd9ec1e59203ff 100644
--- a/llvm/test/Transforms/InstCombine/binop-itofp.ll
+++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll
@@ -1012,7 +1012,7 @@ define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], 0.000000e+00
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret float [[MUL3_I_I]]
;
@@ -1031,7 +1031,7 @@ define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 %
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], zeroinitializer
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
@@ -1050,7 +1050,8 @@ define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345)
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], -0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[CONV1_I]]
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret float [[MUL3_I_I]]
;
@@ -1069,7 +1070,7 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float 0.000000e+00>
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
@@ -1111,7 +1112,8 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(i1 %c
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float -0.000000e+00>
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[CONV1_I]]
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[TMP1]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 96e57939d28505..f6435f0032891e 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -1250,7 +1250,7 @@ define half @mul_zero_nnan(half %x) {
define <2 x float> @mul_zero_nnan_vec_poison(<2 x float> %x) {
; CHECK-LABEL: @mul_zero_nnan_vec_poison(
-; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> [[X:%.*]])
+; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[X:%.*]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%r = fmul nnan <2 x float> %x, <float 0.0, float poison>
@@ -1268,13 +1268,104 @@ define half @mul_zero(half %x) {
ret half %r
}
-; TODO: This could be fneg+copysign.
-
define half @mul_negzero_nnan(half %x) {
; CHECK-LABEL: @mul_negzero_nnan(
-; CHECK-NEXT: [[R:%.*]] = fmul nnan half [[X:%.*]], 0xH8000
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan half [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = call nnan half @llvm.copysign.f16(half 0xH0000, half [[TMP1]])
; CHECK-NEXT: ret half [[R]]
;
%r = fmul nnan half %x, -0.0
ret half %r
}
+
+define float @mul_pos_zero_nnan_ninf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan_ninf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_pos_zero_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = fmul float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_pos_zero_nnan_ninf_fmf(float nofpclass(nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan_ninf_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = call ninf float @llvm.copysign.f32(float 0.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul ninf float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_ninf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, -0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_fmf(float %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call nnan float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul nnan float %a, -0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_ninf_fmf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan ninf float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call nnan ninf float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul nnan ninf float %a, -0.000000e+00
+ ret float %ret
+}
+
+define <3 x float> @mul_neg_zero_nnan_ninf_vec(<3 x float> nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf_vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg <3 x float> [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call <3 x float> @llvm.copysign.v3f32(<3 x float> zeroinitializer, <3 x float> [[TMP0]])
+; CHECK-NEXT: ret <3 x float> [[RET]]
+;
+entry:
+ %ret = fmul <3 x float> %a, <float -0.0, float undef, float poison>
+ ret <3 x float> %ret
+}
+
+define <3 x float> @mul_mixed_zero_nnan_ninf_vec(<3 x float> nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_mixed_zero_nnan_ninf_vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = fmul <3 x float> [[A:%.*]], <float -0.000000e+00, float 0.000000e+00, float poison>
+; CHECK-NEXT: ret <3 x float> [[RET]]
+;
+entry:
+ %ret = fmul <3 x float> %a, <float -0.0, float 0.0, float poison>
+ ret <3 x float> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index 32bfdb52bb5f7e..ac4b88fcddd7ec 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -424,10 +424,7 @@ define i32 @fptosi_select(i1 %cond) {
define i32 @mul_pos_zero_convert(i32 %a) {
; CHECK-LABEL: @mul_pos_zero_convert(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[FP:%.*]] = sitofp i32 [[A:%.*]] to float
-; CHECK-NEXT: [[RET:%.*]] = fmul float [[FP]], 0.000000e+00
-; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[RET]] to i32
-; CHECK-NEXT: ret i32 [[CONV]]
+; CHECK-NEXT: ret i32 0
;
entry:
%fp = sitofp i32 %a to float
More information about the llvm-commits
mailing list