[llvm] [TargetLowering][SelectionDAG] Exploit nneg Flag in UINT_TO_FP (PR #108931)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 07:35:01 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/108931
>From 84d910d63bf4a22ebd051f717837fd6892450e7a Mon Sep 17 00:00:00 2001
From: Michael Marjieh <michael.marjieh at mobileye.com>
Date: Tue, 17 Sep 2024 08:49:00 +0300
Subject: [PATCH 1/3] [TargetLowering][SelectionDAG] Exploit nneg Flag in
UINT_TO_FP
1. Propogate the nneg flag in WidenVecRes
2. Use SINT_TO_FP in expandUINT_TO_FP when possible.
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 10 +++++-----
.../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +++++++++++++------
llvm/test/CodeGen/VE/Scalar/cast.ll | 10 ++++++++++
3 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9674de77388386..e0b47e1045b965 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5208,7 +5208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (N->getOpcode() == ISD::ZERO_EXTEND &&
getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
- WidenVT.getScalarSizeInBits()) {
+ WidenVT.getScalarSizeInBits()) {
InOp = ZExtPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
@@ -5225,7 +5225,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
InVTEC = InVT.getVectorElementCount();
if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags);
if (N->getNumOperands() == 3) {
assert(N->isVPOpcode() && "Expected VP opcode");
SDValue Mask =
@@ -5261,7 +5261,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags);
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
@@ -5270,7 +5270,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags);
return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
@@ -5285,7 +5285,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getVectorIdxConstant(i, DL));
if (N->getNumOperands() == 1)
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags);
else
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 793b8ff164c233..87abda7ca2f990 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8364,18 +8364,26 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
- SDValue &Chain,
- SelectionDAG &DAG) const {
+ SDValue &Chain, SelectionDAG &DAG) const {
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ // If the input is known to be non-negative and SINT_TO_FP is legal then use
+ // it.
+ if (Node->getFlags().hasNonNeg() &&
+ isOperationLegalOrCustom(ISD::SINT_TO_FP, DstVT)) {
+ Result =
+ DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
+ return true;
+ }
+
// This transform is not correct for converting 0 when rounding mode is set
// to round toward negative infinity which will produce -0.0. So disable under
// strictfp.
if (Node->isStrictFPOpcode())
return false;
- SDValue Src = Node->getOperand(0);
- EVT SrcVT = Src.getValueType();
- EVT DstVT = Node->getValueType(0);
-
if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
return false;
diff --git a/llvm/test/CodeGen/VE/Scalar/cast.ll b/llvm/test/CodeGen/VE/Scalar/cast.ll
index 44782b342f4d0f..9253b5591b351d 100644
--- a/llvm/test/CodeGen/VE/Scalar/cast.ll
+++ b/llvm/test/CodeGen/VE/Scalar/cast.ll
@@ -568,6 +568,16 @@ define float @ull2f(i64 %x) {
ret float %r
}
+define float @ull2f_nneg(i64 %x) {
+; CHECK-LABEL: ull2f_nneg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cvt.d.l %s0, %s0
+; CHECK-NEXT: cvt.s.d %s0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %r = uitofp nneg i64 %x to float
+ ret float %r
+}
+
define double @ull2d(i64 %x) {
; CHECK-LABEL: ull2d:
; CHECK: # %bb.0:
>From f822d0cd7380f982c60c731184be32232b00f4c1 Mon Sep 17 00:00:00 2001
From: Michael Marjieh <michael.marjieh at mobileye.com>
Date: Wed, 25 Sep 2024 18:36:12 +0300
Subject: [PATCH 2/3] Fix One More Comment
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 87abda7ca2f990..102e4a9b5fd210 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8372,7 +8372,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// If the input is known to be non-negative and SINT_TO_FP is legal then use
// it.
if (Node->getFlags().hasNonNeg() &&
- isOperationLegalOrCustom(ISD::SINT_TO_FP, DstVT)) {
+ isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
Result =
DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
return true;
>From 50c4ae6367027cda55d3945ea9e63a172e3440f2 Mon Sep 17 00:00:00 2001
From: Michael Marjieh <michael.marjieh at mobileye.com>
Date: Mon, 14 Oct 2024 09:23:39 +0300
Subject: [PATCH 3/3] Add Test and Fix Strict Ops
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 23 +++++----
llvm/test/CodeGen/VE/Scalar/cast.ll | 28 ++++++++++
llvm/test/CodeGen/X86/avx512-cvt.ll | 51 ++++++++++++++++---
3 files changed, 83 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 102e4a9b5fd210..40f030d7b936f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8365,6 +8365,12 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain, SelectionDAG &DAG) const {
+ // This transform is not correct for converting 0 when rounding mode is set
+ // to round toward negative infinity which will produce -0.0. So disable
+ // under strictfp.
+ if (Node->isStrictFPOpcode())
+ return false;
+
SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -8378,16 +8384,11 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return true;
}
- // This transform is not correct for converting 0 when rounding mode is set
- // to round toward negative infinity which will produce -0.0. So disable under
- // strictfp.
- if (Node->isStrictFPOpcode())
- return false;
-
if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
return false;
- // Only expand vector types if we have the appropriate vector bit operations.
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
!isOperationLegalOrCustom(ISD::FADD, DstVT) ||
!isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
@@ -8401,8 +8402,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation performs rounding
// correctly in all rounding modes with the exception of converting 0
- // when rounding toward negative infinity. In that case the fsub will produce
- // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
+ // when rounding toward negative infinity. In that case the fsub will
+ // produce -0.0. This will be added to +0.0 and produce -0.0 which is
+ // incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
@@ -8416,8 +8418,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- SDValue HiSub =
- DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
return true;
}
diff --git a/llvm/test/CodeGen/VE/Scalar/cast.ll b/llvm/test/CodeGen/VE/Scalar/cast.ll
index 9253b5591b351d..6f6c93a1e639fb 100644
--- a/llvm/test/CodeGen/VE/Scalar/cast.ll
+++ b/llvm/test/CodeGen/VE/Scalar/cast.ll
@@ -578,6 +578,34 @@ define float @ull2f_nneg(i64 %x) {
ret float %r
}
+define float @ull2f_strict(i32 %x) {
+; CHECK-LABEL: ull2f_strict:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.l %s11, -16, %s11
+; CHECK-NEXT: brge.l.t %s11, %s8, .LBB58_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ld %s61, 24(, %s14)
+; CHECK-NEXT: or %s62, 0, %s0
+; CHECK-NEXT: lea %s63, 315
+; CHECK-NEXT: shm.l %s63, (%s61)
+; CHECK-NEXT: shm.l %s8, 8(%s61)
+; CHECK-NEXT: shm.l %s11, 16(%s61)
+; CHECK-NEXT: monc
+; CHECK-NEXT: or %s0, 0, %s62
+; CHECK-NEXT: .LBB58_2:
+; CHECK-NEXT: lea %s1, 1127219200
+; CHECK-NEXT: stl %s1, 12(, %s11)
+; CHECK-NEXT: stl %s0, 8(, %s11)
+; CHECK-NEXT: ld %s0, 8(, %s11)
+; CHECK-NEXT: lea.sl %s1, 1127219200
+; CHECK-NEXT: fsub.d %s0, %s0, %s1
+; CHECK-NEXT: cvt.s.d %s0, %s0
+; CHECK-NEXT: adds.l %s11, 16, %s11
+; CHECK-NEXT: b.l.t (, %s10)
+ %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret float %val
+}
+
define double @ull2d(i64 %x) {
; CHECK-LABEL: ull2d:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index ff7f7b39c6c8b1..a78d97782e6a3d 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -312,11 +312,46 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
ret <4 x float> %b
}
+define <4 x float> @ulto4f32_nneg(<4 x i64> %a) {
+; NODQ-LABEL: ulto4f32_nneg:
+; NODQ: # %bb.0:
+; NODQ-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; NODQ-NEXT: vmovq %xmm0, %rax
+; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
+; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; NODQ-NEXT: vmovq %xmm0, %rax
+; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; NODQ-NEXT: vpextrq $1, %xmm0, %rax
+; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; NODQ-NEXT: vzeroupper
+; NODQ-NEXT: retq
+;
+; VLDQ-LABEL: ulto4f32_nneg:
+; VLDQ: # %bb.0:
+; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
+; VLDQ-NEXT: vzeroupper
+; VLDQ-NEXT: retq
+;
+; DQNOVL-LABEL: ulto4f32_nneg:
+; DQNOVL: # %bb.0:
+; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
+; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; DQNOVL-NEXT: vzeroupper
+; DQNOVL-NEXT: retq
+ %b = uitofp nneg <4 x i64> %a to <4 x float>
+ ret <4 x float> %b
+}
+
define <8 x double> @ulto8f64(<8 x i64> %a) {
; NODQ-LABEL: ulto8f64:
; NODQ: # %bb.0:
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
-; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
+; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -342,14 +377,14 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
-; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4
+; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
-; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3
+; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
@@ -1483,7 +1518,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; NODQ-NEXT: retq
;
@@ -1564,7 +1599,7 @@ define <16 x double> @sbto16f64(<16 x double> %a) {
; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
; NODQ-NEXT: kunpckbw %k0, %k1, %k1
-; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NODQ-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
@@ -1603,7 +1638,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) {
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; NOVLDQ-NEXT: retq
;
@@ -1864,7 +1899,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; NODQ-NEXT: retq
@@ -1894,7 +1929,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
More information about the llvm-commits
mailing list