[llvm] r364838 - [X86] Correct v4f32->v2i64 cvt(t)ps2(u)qq memory isel patterns
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 12:01:37 PDT 2019
Author: ctopper
Date: Mon Jul 1 12:01:37 2019
New Revision: 364838
URL: http://llvm.org/viewvc/llvm-project?rev=364838&view=rev
Log:
[X86] Correct v4f32->v2i64 cvt(t)ps2(u)qq memory isel patterns
These instructions only read 64-bits of memory so we shouldn't
allow a full vector width load to be pattern matched in case it
is marked volatile.
Instead allow vzload or scalar_to_vector+load.
Also add a DAG combine to turn full vector loads into vzload when
used by one of these instructions if the load isn't volatile.
This fixes another case for PR42079
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=364838&r1=364837&r2=364838&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jul 1 12:01:37 2019
@@ -41144,6 +41144,41 @@ static SDValue combineX86INT_TO_FP(SDNod
return SDValue();
}
+static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // Convert a full vector load into vzload when not all bits are needed.
+ SDValue In = N->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+ if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+ ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+ assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ // Unless the load is volatile.
+ if (!LN->isVolatile()) {
+ SDLoc dl(N);
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getFloatingPointVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+ DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -43940,6 +43975,10 @@ SDValue X86TargetLowering::PerformDAGCom
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
+ case X86ISD::CVTP2SI:
+ case X86ISD::CVTP2UI:
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=364838&r1=364837&r2=364838&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jul 1 12:01:37 2019
@@ -7979,7 +7979,11 @@ multiclass avx512_cvtps2qq<bits<8> opc,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -7997,7 +8001,11 @@ multiclass avx512_cvttps2qq<bits<8> opc,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -8358,6 +8366,50 @@ let Predicates = [HasDQI] in {
}
let Predicates = [HasDQI, HasVLX] in {
+ def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
(VCVTTPS2QQZ256rr VR128X:$src)>;
def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll?rev=364838&r1=364837&r2=364838&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll Mon Jul 1 12:01:37 2019
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
@@ -126,16 +126,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -147,18 +143,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -170,18 +162,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -194,16 +182,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -215,18 +199,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -238,18 +218,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0]
+; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0]
+; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -361,16 +337,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -382,18 +354,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -405,18 +373,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -428,16 +392,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -449,18 +409,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -472,18 +428,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0]
+; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0]
+; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -766,16 +718,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -787,18 +735,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -810,18 +754,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -834,16 +774,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -855,18 +791,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -878,18 +810,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0]
+; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0]
+; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1078,16 +1006,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1099,18 +1023,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1122,18 +1042,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1146,16 +1062,12 @@ define <2 x i64> @test_int_x86_avx512_cv
; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
-; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
; X64: # %bb.0:
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1167,18 +1079,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
-; X86-NEXT: # xmm1 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f]
-; X64-NEXT: # xmm1 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1190,18 +1098,14 @@ define <2 x i64> @test_int_x86_avx512_ma
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0]
+; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
-; X64-NEXT: # xmm0 = mem[0],zero
-; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0]
+; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%x0 = load <2 x float>, <2 x float>* %p
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll?rev=364838&r1=364837&r2=364838&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll Mon Jul 1 12:01:37 2019
@@ -2698,8 +2698,7 @@ define <2 x i64> @fptosi_2f32_to_2i64_lo
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
; AVX512VLDQ-NEXT: retq
%a = load <2 x float>, <2 x float>* %x
%b = fptosi <2 x float> %a to <2 x i64>
@@ -2787,8 +2786,7 @@ define <2 x i64> @fptoui_2f32_to_2i64_lo
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
; AVX512VLDQ-NEXT: retq
%a = load <2 x float>, <2 x float>* %x
%b = fptoui <2 x float> %a to <2 x i64>
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=364838&r1=364837&r2=364838&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Mon Jul 1 12:01:37 2019
@@ -2980,8 +2980,7 @@ define <2 x i64> @fptosi_2f32_to_2i64_lo
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0
; AVX512VLDQ-NEXT: retq
%a = load <2 x float>, <2 x float>* %x
%b = fptosi <2 x float> %a to <2 x i64>
@@ -3069,8 +3068,7 @@ define <2 x i64> @fptoui_2f32_to_2i64_lo
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0
; AVX512VLDQ-NEXT: retq
%a = load <2 x float>, <2 x float>* %x
%b = fptoui <2 x float> %a to <2 x i64>
More information about the llvm-commits
mailing list