[llvm] r295940 - [X86][AVX512] Change VCVTSS2SD and VCVTSD2SS node types to keep consistency between VEX/EVEX versions.
Ayman Musa via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 23:24:22 PST 2017
Author: aymanmus
Date: Thu Feb 23 01:24:21 2017
New Revision: 295940
URL: http://llvm.org/viewvc/llvm-project?rev=295940&view=rev
Log:
[X86][AVX512] Change VCVTSS2SD and VCVTSD2SS node types to keep consistency between VEX/EVEX versions.
AVX versions of the converts work on f32/f64 types, while AVX512 version work on vectors.
Differential Revision: https://reviews.llvm.org/D29988
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=295940&r1=295939&r2=295940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Feb 23 01:24:21 2017
@@ -259,6 +259,23 @@ multiclass AVX512_maskable_common<bits<8
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
+// Similar to AVX512_maskable_common, but with scalar types.
+multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ SDNode Select = vselect,
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [], [], [],
+ MaskingConstraint, NoItinerary, IsCommutable,
+ IsKCommutable>;
+
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -291,6 +308,18 @@ multiclass AVX512_maskable_scalar<bits<8
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
X86selects, "$src0 = $dst", itin, IsCommutable>;
+// Similar to AVX512_maskable_scalar, but with scalar types.
+multiclass AVX512_maskable_fp_scalar<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_fp_common<O, F, _, Outs, Ins,
+ !con((ins _.FRC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
+ X86selects, "$src0 = $dst", itin, IsCommutable>;
+
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
@@ -6030,27 +6059,40 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode> {
- defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ let isCodeGenOnly = 1 in {
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
- defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT (scalar_to_vector
(_Src.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ }
+
+ defm rr : AVX512_maskable_fp_scalar<opc, MRMSrcReg, _, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2">,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable_fp_scalar<opc, MRMSrcMem, _, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2">,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+
}
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6062,7 +6104,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits
// Scalar Conversion with rounding control (RC)
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6095,39 +6137,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
- (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
- (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
- (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
+ (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=295940&r1=295939&r2=295940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Feb 23 01:24:21 2017
@@ -1851,6 +1851,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
+ { X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
+ { X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
@@ -8165,11 +8169,15 @@ static bool hasUndefRegUpdate(unsigned O
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
case X86::VCVTSD2SSZrr:
- case X86::VCVTSD2SSZrrb:
+ case X86::VCVTSD2SSZrr_Int:
+ case X86::VCVTSD2SSZrrb_Int:
case X86::VCVTSD2SSZrm:
+ case X86::VCVTSD2SSZrm_Int:
case X86::VCVTSS2SDZrr:
- case X86::VCVTSS2SDZrrb:
+ case X86::VCVTSS2SDZrr_Int:
+ case X86::VCVTSS2SDZrrb_Int:
case X86::VCVTSS2SDZrm:
+ case X86::VCVTSS2SDZrm_Int:
case X86::VRNDSCALESDr:
case X86::VRNDSCALESDrb:
case X86::VRNDSCALESDm:
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=295940&r1=295939&r2=295940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Feb 23 01:24:21 2017
@@ -1716,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $ds
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
- (ins FR64:$src1, FR64:$src2),
+ (ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
- (ins FR64:$src1, f64mem:$src2),
+ (ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
-def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+def : Pat<(f32 (fpround FR64:$src)),
+ (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@@ -1781,14 +1782,14 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
// SSE2 instructions with XS prefix
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
- (ins FR32:$src1, FR32:$src2),
+ (ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
- (ins FR32:$src1, f32mem:$src2),
+ (ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
@@ -1796,15 +1797,15 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (ou
}
def : Pat<(f64 (fpextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=295940&r1=295939&r2=295940&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Thu Feb 23 01:24:21 2017
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
;
; Half to Float
@@ -1941,25 +1941,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX1-LABEL: cvt_8i16_to_8f64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq %xmm0, %rdx
-; AVX1-NEXT: movq %rdx, %r9
+; AVX1-NEXT: movq %rdx, %r8
; AVX1-NEXT: movl %edx, %r10d
-; AVX1-NEXT: movswl %dx, %r8d
+; AVX1-NEXT: movswl %dx, %r9d
; AVX1-NEXT: shrq $48, %rdx
-; AVX1-NEXT: shrq $32, %r9
+; AVX1-NEXT: shrq $32, %r8
; AVX1-NEXT: shrl $16, %r10d
; AVX1-NEXT: vpextrq $1, %xmm0, %rdi
-; AVX1-NEXT: movq %rdi, %rsi
-; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: movq %rdi, %rax
+; AVX1-NEXT: movl %edi, %esi
; AVX1-NEXT: movswl %di, %ecx
; AVX1-NEXT: shrq $48, %rdi
-; AVX1-NEXT: shrq $32, %rsi
-; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: shrq $32, %rax
+; AVX1-NEXT: shrl $16, %esi
+; AVX1-NEXT: movswl %si, %esi
+; AVX1-NEXT: vmovd %esi, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2
-; AVX1-NEXT: movswl %si, %eax
+; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3
; AVX1-NEXT: movswl %di, %eax
@@ -1968,9 +1968,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX1-NEXT: movswl %r10w, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX1-NEXT: vmovd %r8d, %xmm5
+; AVX1-NEXT: vmovd %r9d, %xmm5
; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5
-; AVX1-NEXT: movswl %r9w, %eax
+; AVX1-NEXT: movswl %r8w, %eax
; AVX1-NEXT: vmovd %eax, %xmm6
; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6
; AVX1-NEXT: movswl %dx, %eax
@@ -1995,25 +1995,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX2-LABEL: cvt_8i16_to_8f64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq %xmm0, %rdx
-; AVX2-NEXT: movq %rdx, %r9
+; AVX2-NEXT: movq %rdx, %r8
; AVX2-NEXT: movl %edx, %r10d
-; AVX2-NEXT: movswl %dx, %r8d
+; AVX2-NEXT: movswl %dx, %r9d
; AVX2-NEXT: shrq $48, %rdx
-; AVX2-NEXT: shrq $32, %r9
+; AVX2-NEXT: shrq $32, %r8
; AVX2-NEXT: shrl $16, %r10d
; AVX2-NEXT: vpextrq $1, %xmm0, %rdi
-; AVX2-NEXT: movq %rdi, %rsi
-; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: movl %edi, %esi
; AVX2-NEXT: movswl %di, %ecx
; AVX2-NEXT: shrq $48, %rdi
-; AVX2-NEXT: shrq $32, %rsi
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: cwtl
-; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: shrq $32, %rax
+; AVX2-NEXT: shrl $16, %esi
+; AVX2-NEXT: movswl %si, %esi
+; AVX2-NEXT: vmovd %esi, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
-; AVX2-NEXT: movswl %si, %eax
+; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3
; AVX2-NEXT: movswl %di, %eax
@@ -2022,9 +2022,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX2-NEXT: movswl %r10w, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
-; AVX2-NEXT: vmovd %r8d, %xmm5
+; AVX2-NEXT: vmovd %r9d, %xmm5
; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5
-; AVX2-NEXT: movswl %r9w, %eax
+; AVX2-NEXT: movswl %r8w, %eax
; AVX2-NEXT: vmovd %eax, %xmm6
; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6
; AVX2-NEXT: movswl %dx, %eax
More information about the llvm-commits
mailing list