[llvm] r339536 - [SelectionDAG] In PromoteFloatOp_BITCAST, insert a bitcast after the fp_to_fp16 in case the result type isn't a scalar integer.

Sun Aug 12 23:53:49 PDT 2018

Author: ctopper
Date: Sun Aug 12 23:53:49 2018
New Revision: 339536

URL: http://llvm.org/viewvc/llvm-project?rev=339536&view=rev
Log:
[SelectionDAG] In PromoteFloatOp_BITCAST, insert a bitcast after the fp_to_fp16 in case the result type isn't a scalar integer.

This is another variation of PR38533. In this case, the result type of the bitcast is legal and 16-bits wide, but not a scalar integer. So we need to emit the convert to i16 and then bitcast it to the true result type. This new bitcast will be further type legalized if necessary.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
    llvm/trunk/test/CodeGen/X86/pr38533.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp?rev=339536&r1=339535&r2=339536&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp Sun Aug 12 23:53:49 2018
@@ -1778,15 +1778,16 @@ SDValue DAGTypeLegalizer::PromoteFloatOp
   SDValue Op = N->getOperand(0);
   EVT OpVT = Op->getValueType(0);
 
-  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
-  assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
-
   SDValue Promoted = GetPromotedFloat(N->getOperand(0));
   EVT PromotedVT = Promoted->getValueType(0);
 
   // Convert the promoted float value to the desired IVT.
-  return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
-                     Promoted);
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N),
+                                IVT, Promoted);
+  // The final result type might not be an scalar so we need a bitcast. The
+  // bitcast will be further legalized if needed.
+  return DAG.getBitcast(N->getValueType(0), Convert);
 }
 
 // Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
@@ -1941,9 +1942,11 @@ void DAGTypeLegalizer::PromoteFloatResul
 SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-  // Input type isn't guaranteed to be i16 so bitcast if not. The bitcast
-  // will be legalized further if necessary.
-  SDValue Cast = DAG.getBitcast(MVT::i16, N->getOperand(0));
+  // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+  // bitcast will be legalized further if necessary.
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+                              N->getOperand(0).getValueType().getSizeInBits());
+  SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
   return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
 }
 

Modified: llvm/trunk/test/CodeGen/X86/pr38533.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr38533.ll?rev=339536&r1=339535&r2=339536&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr38533.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr38533.ll Sun Aug 12 23:53:49 2018
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
 
 ; This test makes sure that a vector that needs to be promoted that is bitcasted to fp16 is legalized correctly without causing a width mismatch.
 define void @constant_fold_vector_to_half() {
@@ -13,18 +14,52 @@ define void @constant_fold_vector_to_hal
 
 ; Similarly this makes sure that the opposite bitcast of the above is also legalized without crashing.
 define void @pr38533_2(half %x) {
-; CHECK-LABEL: pr38533_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq __gnu_f2h_ieee
-; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movw %ax, (%rax)
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; SSE-LABEL: pr38533_2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    callq __gnu_f2h_ieee
+; SSE-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movw %ax, (%rax)
+; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX512-LABEL: pr38533_2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; AVX512-NEXT:    movw %ax, (%rax)
+; AVX512-NEXT:    retq
   %a = bitcast half %x to <4 x i4>
   store volatile <4 x i4> %a, <4 x i4>* undef
   ret void
 }
+
+; This case is a bitcast from fp16 to a 16-bit wide legal vector type. In this case the result type is legal when the bitcast gets type legalized.
+define void @pr38533_3(half %x) {
+; SSE-LABEL: pr38533_3:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    callq __gnu_f2h_ieee
+; SSE-NEXT:    movw %ax, (%rsp)
+; SSE-NEXT:    movzwl (%rsp), %eax
+; SSE-NEXT:    movw %ax, (%rax)
+; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX512-LABEL: pr38533_3:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    movw %ax, (%rax)
+; AVX512-NEXT:    retq
+  %a = bitcast half %x to <16 x i1>
+  store volatile <16 x i1> %a, <16 x i1>* undef
+  ret void
+}