[llvm] r337055 - [X86][FastISel] Support uitofp with avx512.

Fri Jul 13 15:09:30 PDT 2018

Author: ctopper
Date: Fri Jul 13 15:09:30 2018
New Revision: 337055

URL: http://llvm.org/viewvc/llvm-project?rev=337055&view=rev
Log:
[X86][FastISel] Support uitofp with avx512.

Added:
    llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
Modified:
    llvm/trunk/lib/Target/X86/X86FastISel.cpp

Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=337055&r1=337054&r2=337055&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Fri Jul 13 15:09:30 2018
@@ -134,6 +134,8 @@ private:
   bool X86SelectFPExt(const Instruction *I);
   bool X86SelectFPTrunc(const Instruction *I);
   bool X86SelectSIToFP(const Instruction *I);
+  bool X86SelectUIToFP(const Instruction *I);
+  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
 
   const X86InstrInfo *getInstrInfo() const {
     return Subtarget->getInstrInfo();
@@ -2410,11 +2412,14 @@ bool X86FastISel::X86SelectSelect(const
   return false;
 }
 
-bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+// Common code for X86SelectSIToFP and X86SelectUIToFP.
+bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
   // The target-independent selection algorithm in FastISel already knows how
   // to select a SINT_TO_FP if the target is SSE but not AVX.
   // Early exit if the subtarget doesn't have AVX.
-  if (!Subtarget->hasAVX())
+  // Unsigned conversion requires avx512.
+  bool HasAVX512 = Subtarget->hasAVX512();
+  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
     return false;
 
   // TODO: We could sign extend narrower types.
@@ -2429,21 +2434,24 @@ bool X86FastISel::X86SelectSIToFP(const
 
   unsigned Opcode;
 
-  static const uint16_t CvtOpc[2][2][2] = {
+  static const uint16_t SCvtOpc[2][2][2] = {
     { { X86::VCVTSI2SSrr,  X86::VCVTSI642SSrr },
       { X86::VCVTSI2SDrr,  X86::VCVTSI642SDrr } },
     { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
       { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
   };
-  bool HasAVX512 = Subtarget->hasAVX512();
+  static const uint16_t UCvtOpc[2][2] = {
+    { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
+    { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
+  };
   bool Is64Bit = SrcVT == MVT::i64;
 
   if (I->getType()->isDoubleTy()) {
-    // sitofp int -> double
-    Opcode = CvtOpc[HasAVX512][1][Is64Bit];
+    // s/uitofp int -> double
+    Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
   } else if (I->getType()->isFloatTy()) {
-    // sitofp int -> float
-    Opcode = CvtOpc[HasAVX512][0][Is64Bit];
+    // s/uitofp int -> float
+    Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
   } else
     return false;
 
@@ -2458,6 +2466,14 @@ bool X86FastISel::X86SelectSIToFP(const
   return true;
 }
 
+bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+  return X86SelectIntToFP(I, /*IsSigned*/true);
+}
+
+bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
+  return X86SelectIntToFP(I, /*IsSigned*/false);
+}
+
 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
                                           unsigned TargetOpc,
@@ -3632,6 +3648,8 @@ X86FastISel::fastSelectInstruction(const
     return X86SelectFPTrunc(I);
   case Instruction::SIToFP:
     return X86SelectSIToFP(I);
+  case Instruction::UIToFP:
+    return X86SelectUIToFP(I);
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
     EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());

Added: llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll?rev=337055&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll Fri Jul 13 15:09:30 2018
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+
+define double @long_to_double_rr(i64 %a) {
+; ALL-LABEL: long_to_double_rr:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2sdq %rdi, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = uitofp i64 %a to double
+  ret double %0
+}
+
+define double @long_to_double_rm(i64* %a) {
+; ALL-LABEL: long_to_double_rm:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movq (%rdi), %rax
+; ALL-NEXT:    vcvtusi2sdq %rax, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to double
+  ret double %1
+}
+
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; ALL-LABEL: long_to_double_rm_optsize:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2sdq (%rdi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to double
+  ret double %1
+}
+
+define float @long_to_float_rr(i64 %a) {
+; ALL-LABEL: long_to_float_rr:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2ssq %rdi, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = uitofp i64 %a to float
+  ret float %0
+}
+
+define float @long_to_float_rm(i64* %a) {
+; ALL-LABEL: long_to_float_rm:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movq (%rdi), %rax
+; ALL-NEXT:    vcvtusi2ssq %rax, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to float
+  ret float %1
+}
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; ALL-LABEL: long_to_float_rm_optsize:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2ssq (%rdi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to float
+  ret float %1
+}

Added: llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll?rev=337055&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll Fri Jul 13 15:09:30 2018
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86
+
+
+define double @int_to_double_rr(i32 %a) {
+; AVX-LABEL: int_to_double_rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rr:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = uitofp i32 %a to double
+  ret double %0
+}
+
+define double @int_to_double_rm(i32* %a) {
+; AVX-LABEL: int_to_double_rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movl (%rdi), %eax
+; AVX-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rm:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to double
+  ret double %1
+}
+
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to double
+  ret double %1
+}
+
+define float @int_to_float_rr(i32 %a) {
+; AVX-LABEL: int_to_float_rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rr:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = uitofp i32 %a to float
+  ret float %0
+}
+
+define float @int_to_float_rm(i32* %a) {
+; AVX-LABEL: int_to_float_rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movl (%rdi), %eax
+; AVX-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rm:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to float
+  ret float %1
+}
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to float
+  ret float %1
+}