[PATCH] D125988: [x86][SelectionDAG] Unroll vectorized FREM instructions which will be lowered to libcalls
Nabeel Omer via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 23 07:31:02 PDT 2022
n-omer updated this revision to Diff 431367.
n-omer added a comment.
Add context to diff.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D125988/new/
https://reviews.llvm.org/D125988
Files:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/X86/frem-libcall.ll
Index: llvm/test/CodeGen/X86/frem-libcall.ll
===================================================================
--- llvm/test/CodeGen/X86/frem-libcall.ll
+++ llvm/test/CodeGen/X86/frem-libcall.ll
@@ -8,42 +8,26 @@
; CHECK-LABEL: frem:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $80, %rsp
+; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; CHECK-NEXT: callq fmodf at PLT
-; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
-; CHECK-NEXT: callq fmodf at PLT
-; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: callq fmodf at PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
; CHECK-NEXT: callq fmodf at PLT
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1]
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: movlps %xmm1, (%rbx)
-; CHECK-NEXT: addq $80, %rsp
+; CHECK-NEXT: addq $64, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%frem = frem <2 x float> %a0, %a1
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3678,12 +3678,29 @@
Res = WidenVecRes_Binary(N);
break;
+ case ISD::FREM: {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ break;
+ }
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those like
+ // any other binary ops.
+ }
+ LLVM_FALLTHROUGH;
+
case ISD::FADD:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
case ISD::FDIV:
- case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D125988.431367.patch
Type: text/x-patch
Size: 3881 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220523/ac7eb4a4/attachment.bin>
More information about the llvm-commits
mailing list