[llvm] 8561283 - [DAG] Unroll opereand when its type is illegal for ldexp.
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 15 18:44:09 PDT 2023
Author: tianleli
Date: 2023-06-16T09:44:04+08:00
New Revision: 8561283b10f79b8bdb0772c401cf9b585f5f9bbb
URL: https://github.com/llvm/llvm-project/commit/8561283b10f79b8bdb0772c401cf9b585f5f9bbb
DIFF: https://github.com/llvm/llvm-project/commit/8561283b10f79b8bdb0772c401cf9b585f5f9bbb.diff
LOG: [DAG] Unroll opereand when its type is illegal for ldexp.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D152997
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/X86/ldexp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b7280a3e4ef0d..ece1201e0459f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1009,7 +1009,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
- SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_UnrollVectorOp(SDNode *N);
SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1892f4cffb4b2..21d795220e5bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5882,7 +5882,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
- case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
@@ -6031,7 +6032,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
-SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
// The result (and first input) is legal, but the second input is illegal.
// We can't do much to fix that, so just unroll and let the extracts off of
// the second input be widened as needed later.
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index bbf0f97d8eace..85ba8ef53416d 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -674,6 +674,160 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
ret <4 x float> %1
}
+define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) {
+; X64-LABEL: ldexp_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: subq $56, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 64
+; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT: movd %xmm1, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X64-NEXT: # xmm1 = mem[1,1,1,1]
+; X64-NEXT: movd %xmm1, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: addq $56, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; WIN32-LABEL: ldexp_v2f64:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %esi
+; WIN32-NEXT: subl $28, %esp
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fxch %st(1)
+; WIN32-NEXT: addl $28, %esp
+; WIN32-NEXT: popl %esi
+; WIN32-NEXT: retl
+ %1 = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %val, <2 x i32> %exp)
+ ret <2 x double> %1
+}
+
+define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) {
+; X64-LABEL: ldexp_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: subq $72, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 96
+; X64-NEXT: .cfi_offset %rbx, -24
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
+; X64-NEXT: movd %xmm1, %ebx
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3]
+; X64-NEXT: movd %xmm1, %ebp
+; X64-NEXT: movd %xmm2, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: pshufd $85, (%rsp), %xmm1 # 16-byte Folded Reload
+; X64-NEXT: # xmm1 = mem[1,1,1,1]
+; X64-NEXT: movd %xmm1, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: movl %ebp, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq ldexp at PLT
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
+; X64-NEXT: # xmm1 = xmm1[0],mem[0]
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: addq $72, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; WIN32-LABEL: ldexp_v4f64:
+; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %ebp
+; WIN32-NEXT: pushl %ebx
+; WIN32-NEXT: pushl %edi
+; WIN32-NEXT: pushl %esi
+; WIN32-NEXT: subl $44, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: fldl {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl 24(%esi)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl 16(%esi)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl 8(%esi)
+; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; WIN32-NEXT: fstpl (%esi)
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: addl $44, %esp
+; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
+; WIN32-NEXT: popl %ebx
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+ %1 = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %val, <4 x i32> %exp)
+ ret <4 x double> %1
+}
+
define half @ldexp_f16(half %arg0, i32 %arg1) {
; X64-LABEL: ldexp_f16:
; X64: # %bb.0:
@@ -699,66 +853,66 @@ define half @ldexp_f16(half %arg0, i32 %arg1) {
; WIN32-NEXT: movl %eax, (%esp)
; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D
; WIN32-NEXT: movl %edi, %esi
-; WIN32-NEXT: jl LBB4_2
+; WIN32-NEXT: jl LBB6_2
; WIN32-NEXT: # %bb.1:
; WIN32-NEXT: movl $381, %esi # imm = 0x17D
-; WIN32-NEXT: LBB4_2:
+; WIN32-NEXT: LBB6_2:
; WIN32-NEXT: addl $-254, %esi
; WIN32-NEXT: calll ___gnu_h2f_ieee
; WIN32-NEXT: leal -127(%edi), %eax
; WIN32-NEXT: cmpl $255, %edi
-; WIN32-NEXT: jae LBB4_4
+; WIN32-NEXT: jae LBB6_4
; WIN32-NEXT: # %bb.3:
; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: LBB4_4:
+; WIN32-NEXT: LBB6_4:
; WIN32-NEXT: flds __real at 7f000000
; WIN32-NEXT: fld %st(1)
; WIN32-NEXT: fmul %st(1), %st
; WIN32-NEXT: fmul %st, %st(1)
-; WIN32-NEXT: jae LBB4_6
+; WIN32-NEXT: jae LBB6_6
; WIN32-NEXT: # %bb.5:
; WIN32-NEXT: fstp %st(1)
; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB4_6:
+; WIN32-NEXT: LBB6_6:
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7
; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: jge LBB4_8
+; WIN32-NEXT: jge LBB6_8
; WIN32-NEXT: # %bb.7:
; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6
-; WIN32-NEXT: LBB4_8:
+; WIN32-NEXT: LBB6_8:
; WIN32-NEXT: flds __real at 0c800000
; WIN32-NEXT: fld %st(2)
; WIN32-NEXT: fmul %st(1), %st
; WIN32-NEXT: fmul %st, %st(1)
; WIN32-NEXT: cmpl $-228, %edi
-; WIN32-NEXT: jb LBB4_9
+; WIN32-NEXT: jb LBB6_9
; WIN32-NEXT: # %bb.10:
; WIN32-NEXT: fstp %st(1)
; WIN32-NEXT: leal 102(%edi), %eax
; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jge LBB4_12
-; WIN32-NEXT: jmp LBB4_13
-; WIN32-NEXT: LBB4_9:
+; WIN32-NEXT: jge LBB6_12
+; WIN32-NEXT: jmp LBB6_13
+; WIN32-NEXT: LBB6_9:
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: addl $204, %eax
; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jl LBB4_13
-; WIN32-NEXT: LBB4_12:
+; WIN32-NEXT: jl LBB6_13
+; WIN32-NEXT: LBB6_12:
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: fldz
; WIN32-NEXT: fxch %st(2)
-; WIN32-NEXT: LBB4_13:
+; WIN32-NEXT: LBB6_13:
; WIN32-NEXT: fstp %st(2)
; WIN32-NEXT: cmpl $127, %edi
-; WIN32-NEXT: jg LBB4_15
+; WIN32-NEXT: jg LBB6_15
; WIN32-NEXT: # %bb.14:
; WIN32-NEXT: fstp %st(0)
; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: fldz
; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB4_15:
+; WIN32-NEXT: LBB6_15:
; WIN32-NEXT: fstp %st(1)
; WIN32-NEXT: shll $23, %esi
; WIN32-NEXT: addl $1065353216, %esi # imm = 0x3F800000
@@ -778,6 +932,8 @@ declare double @llvm.ldexp.f64.i32(double, i32) #0
declare float @llvm.ldexp.f32.i32(float, i32) #0
declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
+declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0
+declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>) #0
declare half @llvm.ldexp.f16.i32(half, i32) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
More information about the llvm-commits
mailing list