[llvm] 1c27275 - [DAG] Unroll and expand illegal result of LDEXP and POWI instead of widen.
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 20 23:27:45 PDT 2023
Author: tianleli
Date: 2023-06-21T14:27:39+08:00
New Revision: 1c2727581378677152cfb4909358e6b9e4b9b3ad
URL: https://github.com/llvm/llvm-project/commit/1c2727581378677152cfb4909358e6b9e4b9b3ad
DIFF: https://github.com/llvm/llvm-project/commit/1c2727581378677152cfb4909358e6b9e4b9b3ad.diff
LOG: [DAG] Unroll and expand illegal result of LDEXP and POWI instead of widen.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D153104
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/PowerPC/ldexp.ll
llvm/test/CodeGen/X86/ldexp.ll
llvm/test/CodeGen/X86/powi.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 21d795220e5bb..1a29aaff1b8c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4087,7 +4087,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLDEXP:
case ISD::FPOWI:
- Res = WidenVecRes_ExpOp(N);
+ if (!unrollExpandedOp())
+ Res = WidenVecRes_ExpOp(N);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 7a6bab84cbc4a..ed8089b4b303e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -52,56 +52,37 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; CHECK-LABEL: ldexp_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -96(r1)
-; CHECK-NEXT: std r0, 112(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset v28, -64
; CHECK-NEXT: .cfi_offset v29, -48
; CHECK-NEXT: .cfi_offset v30, -32
; CHECK-NEXT: .cfi_offset v31, -16
-; CHECK-NEXT: li r3, 12
-; CHECK-NEXT: xscvspdpn f1, v2
-; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill
+; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: vextuwrx r4, r3, v3
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v3
; CHECK-NEXT: vmr v30, v2
-; CHECK-NEXT: vextuwrx r4, r3, v3
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
; CHECK-NEXT: xxswapd vs0, v30
; CHECK-NEXT: li r3, 4
-; CHECK-NEXT: xscpsgndp v29, f1, f1
+; CHECK-NEXT: xscvdpspn v29, f1
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: vextuwrx r4, r3, v31
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-NEXT: xxmrghd vs0, v29, vs1
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: vextuwrx r4, r3, v31
-; CHECK-NEXT: xvcvdpsp v28, vs0
-; CHECK-NEXT: xxsldwi vs0, v30, v30, 3
-; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: bl ldexpf
-; CHECK-NEXT: nop
-; CHECK-NEXT: xxsldwi vs0, v30, v30, 1
-; CHECK-NEXT: xscpsgndp v29, f1, f1
-; CHECK-NEXT: mfvsrwz r4, v31
-; CHECK-NEXT: xscvspdpn f1, vs0
-; CHECK-NEXT: bl ldexpf
-; CHECK-NEXT: nop
-; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; CHECK-NEXT: xxmrghd vs0, vs1, v29
-; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload
-; CHECK-NEXT: xvcvdpsp v2, vs0
-; CHECK-NEXT: vmrgew v2, v28, v2
-; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 96
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghw v2, vs0, v29
+; CHECK-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 80
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index 85ba8ef53416d..21062e35fd13a 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -106,30 +106,14 @@ define double @ldexp_f64(i8 zeroext %x) {
define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; X64-LABEL: ldexp_v2f32:
; X64: # %bb.0:
-; X64-NEXT: subq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 80
+; X64-NEXT: subq $56, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 64
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
-; X64-NEXT: movd %xmm2, %edi
-; X64-NEXT: callq ldexpf at PLT
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; X64-NEXT: # xmm1 = mem[2,3,2,3]
; X64-NEXT: movd %xmm1, %edi
; X64-NEXT: callq ldexpf at PLT
-; X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
-; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; X64-NEXT: movd %xmm0, %edi
-; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; X64-NEXT: callq ldexpf at PLT
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; X64-NEXT: # xmm1 = mem[1,1,1,1]
@@ -137,10 +121,8 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; X64-NEXT: callq ldexpf at PLT
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
-; X64-NEXT: # xmm1 = xmm1[0],mem[0]
; X64-NEXT: movaps %xmm1, %xmm0
-; X64-NEXT: addq $72, %rsp
+; X64-NEXT: addq $56, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/powi.ll b/llvm/test/CodeGen/X86/powi.ll
index af710992415ce..7dc6564e62a85 100644
--- a/llvm/test/CodeGen/X86/powi.ll
+++ b/llvm/test/CodeGen/X86/powi.ll
@@ -154,7 +154,101 @@ define double @pow_wrapper_minsize(double %a) minsize {
ret double %ret
}
+define <2 x float> @powi_v2f32(<2 x float> %a) minsize {
+; X86-X87-LABEL: powi_v2f32:
+; X86-X87: # %bb.0:
+; X86-X87-NEXT: pushl %esi
+; X86-X87-NEXT: .cfi_def_cfa_offset 8
+; X86-X87-NEXT: subl $16, %esp
+; X86-X87-NEXT: .cfi_def_cfa_offset 24
+; X86-X87-NEXT: .cfi_offset %esi, -8
+; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
+; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
+; X86-X87-NEXT: pushl $15
+; X86-X87-NEXT: .cfi_adjust_cfa_offset 4
+; X86-X87-NEXT: popl %esi
+; X86-X87-NEXT: .cfi_adjust_cfa_offset -4
+; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-X87-NEXT: fstps (%esp)
+; X86-X87-NEXT: calll __powisf2
+; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-X87-NEXT: fstps (%esp)
+; X86-X87-NEXT: calll __powisf2
+; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-X87-NEXT: fxch %st(1)
+; X86-X87-NEXT: addl $16, %esp
+; X86-X87-NEXT: .cfi_def_cfa_offset 8
+; X86-X87-NEXT: popl %esi
+; X86-X87-NEXT: .cfi_def_cfa_offset 4
+; X86-X87-NEXT: retl
+;
+; X86-SSE-LABEL: powi_v2f32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %esi
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: subl $32, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 40
+; X86-SSE-NEXT: .cfi_offset %esi, -8
+; X86-SSE-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE-NEXT: pushl $15
+; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE-NEXT: popl %esi
+; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4
+; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: calll __powisf2
+; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE-NEXT: movss %xmm0, (%esp)
+; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: calll __powisf2
+; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: addl $32, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE-NEXT: popl %esi
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: powi_v2f32:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: subq $32, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 48
+; X64-NEXT: .cfi_offset %rbx, -16
+; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT: pushq $15
+; X64-NEXT: .cfi_adjust_cfa_offset 8
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_adjust_cfa_offset -8
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq __powisf2 at PLT
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq __powisf2 at PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ;
+ ret <2 x float> %ret
+}
+
declare double @llvm.powi.f64.i32(double, i32) nounwind readonly
+declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
More information about the llvm-commits
mailing list