[llvm] 2aecffc - [CostModel][X86] Find AVX conversion costs using legalized types if custom types didn't match

Fri Jul 2 05:50:05 PDT 2021

Author: Simon Pilgrim
Date: 2021-07-02T13:49:31+01:00
New Revision: 2aecffcd40de99bd3155f57e9c91fe57c3207d39

URL: https://github.com/llvm/llvm-project/commit/2aecffcd40de99bd3155f57e9c91fe57c3207d39
DIFF: https://github.com/llvm/llvm-project/commit/2aecffcd40de99bd3155f57e9c91fe57c3207d39.diff

LOG: [CostModel][X86] Find AVX conversion costs using legalized types if custom types didn't match

Building on rG2a1ef8784ad9a, fallback to attempting to match against legalized types like we do for SSE targets.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/fptosi.ll
    llvm/test/Analysis/CostModel/X86/sitofp.ll
    llvm/test/Analysis/CostModel/X86/uitofp.ll
    llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 265c4d7bcf71..d55cd8a8c7a8 100644

--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1955,6 +1955,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i32, 2 },
     { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
     { ISD::SINT_TO_FP,  MVT::v8f64, MVT::v8i32, 4 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v2i64, 5 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i64, 8 },
 
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1,  7 },
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i1,  7 },
@@ -1966,6 +1968,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i16, 2 },
     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 4 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i64, 10 },
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 5 },
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i64, 18 },
@@ -2242,17 +2245,58 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   }
 
   // Fall back to legalized types.
-  // TODO: Add AVX support.
   std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
   std::pair<InstructionCost, MVT> LTDest =
       TLI->getTypeLegalizationCost(DL, Dst);
 
-  if (ST->hasSSE41() && !ST->hasAVX())
+  if (ST->useAVX512Regs()) {
+    if (ST->hasBWI())
+      if (const auto *Entry = ConvertCostTableLookup(
+              AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
+        return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+    if (ST->hasDQI())
+      if (const auto *Entry = ConvertCostTableLookup(
+              AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
+        return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+    if (ST->hasAVX512())
+      if (const auto *Entry = ConvertCostTableLookup(
+              AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
+        return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+  }
+
+  if (ST->hasBWI())
+    if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+  if (ST->hasDQI())
+    if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+  if (ST->hasAVX512())
+    if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+  if (ST->hasAVX2())
+    if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+  if (ST->hasAVX())
+    if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
+
+  if (ST->hasSSE41())
     if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
                                                    LTDest.second, LTSrc.second))
       return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
 
-  if (ST->hasSSE2() && !ST->hasAVX())
+  if (ST->hasSSE2())
     if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
                                                    LTDest.second, LTSrc.second))
       return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);

diff  --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll
index fd84c5eafdae..d92310cb79f8 100644
--- a/llvm/test/Analysis/CostModel/X86/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll
@@ -239,7 +239,7 @@ define i32 @fptosi_float_i32(i32 %arg) {
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'fptosi_float_i32'

diff  --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll
index 67d366362455..b3c400c93b9f 100644
--- a/llvm/test/Analysis/CostModel/X86/sitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll
@@ -85,21 +85,21 @@ define i32 @sitofp_i32_double() {
 ;
 ; AVX1-LABEL: 'sitofp_i32_double'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sitofp_i32_double'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'sitofp_i32_double'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -158,7 +158,7 @@ define i32 @sitofp_i8_float() {
 ;
 ; AVX-LABEL: 'sitofp_i8_float'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
@@ -166,7 +166,7 @@ define i32 @sitofp_i8_float() {
 ;
 ; AVX512-LABEL: 'sitofp_i8_float'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
@@ -191,7 +191,7 @@ define i32 @sitofp_i16_float() {
 ;
 ; AVX-LABEL: 'sitofp_i16_float'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
@@ -199,7 +199,7 @@ define i32 @sitofp_i16_float() {
 ;
 ; AVX512-LABEL: 'sitofp_i16_float'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
@@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() {
 ;
 ; AVX1-LABEL: 'sitofp_i32_float'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() {
 ;
 ; AVX2-LABEL: 'sitofp_i32_float'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() {
 ;
 ; AVX512-LABEL: 'sitofp_i32_float'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@@ -273,16 +273,16 @@ define i32 @sitofp_i64_float() {
 ;
 ; AVX-LABEL: 'sitofp_i64_float'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sitofp_i64_float'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef

diff  --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll
index 56cac09a5b45..db092ab20f35 100644
--- a/llvm/test/Analysis/CostModel/X86/uitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll
@@ -224,7 +224,7 @@ define i32 @uitofp_i32_float() {
 ;
 ; AVX1-LABEL: 'uitofp_i32_float'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
index 73710e2d8888..2e72351858fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
@@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
 }
 
 define void @sitofp_2i32_2f64() #0 {
-; CHECK-LABEL: @sitofp_2i32_2f64(
-; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
-; CHECK-NEXT:    [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
-; CHECK-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; CHECK-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT:    ret void
+; SSE-LABEL: @sitofp_2i32_2f64(
+; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
+; SSE-NEXT:    [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
+; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
+; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    ret void
+;
+; AVX-LABEL: @sitofp_2i32_2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
+; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
   %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
@@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
 ; SSE-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
-; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
-; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
-; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    ret void
-;
-; AVX512-LABEL: @sitofp_4i64_4f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
-; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; AVX512-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; AVX512-NEXT:    ret void
-;
-; AVX256DQ-LABEL: @sitofp_4i64_4f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
-; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; AVX256DQ-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; AVX256DQ-NEXT:    ret void
+; AVX-LABEL: @sitofp_4i64_4f32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
+; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
   %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
 ; SSE-NEXT:    store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
 ; SSE-NEXT:    ret void
 ;
-; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
-; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
-; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
-; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
-; AVX256NODQ-NEXT:    [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
-; AVX256NODQ-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
-; AVX256NODQ-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
-; AVX256NODQ-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-; AVX256NODQ-NEXT:    store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; AVX256NODQ-NEXT:    store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; AVX256NODQ-NEXT:    ret void
-;
-; AVX512-LABEL: @sitofp_8i64_8f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
-; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX512-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX512-NEXT:    ret void
-;
-; AVX256DQ-LABEL: @sitofp_8i64_8f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
-; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX256DQ-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256DQ-NEXT:    ret void
+; AVX-LABEL: @sitofp_8i64_8f32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
+; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
   %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
index fc90662061d5..2109520db81d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
@@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
 }
 
 define void @sitofp_2i32_2f64() #0 {
-; CHECK-LABEL: @sitofp_2i32_2f64(
-; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
-; CHECK-NEXT:    [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
-; CHECK-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; CHECK-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; CHECK-NEXT:    ret void
+; SSE-LABEL: @sitofp_2i32_2f64(
+; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
+; SSE-NEXT:    [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
+; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
+; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    ret void
+;
+; AVX-LABEL: @sitofp_2i32_2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
+; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
   %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
@@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
 ; SSE-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
-; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
-; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
-; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    ret void
-;
-; AVX512-LABEL: @sitofp_4i64_4f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
-; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; AVX512-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; AVX512-NEXT:    ret void
-;
-; AVX256DQ-LABEL: @sitofp_4i64_4f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
-; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; AVX256DQ-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; AVX256DQ-NEXT:    ret void
+; AVX-LABEL: @sitofp_4i64_4f32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
+; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
   %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
 ; SSE-NEXT:    store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
 ; SSE-NEXT:    ret void
 ;
-; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
-; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
-; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
-; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
-; AVX256NODQ-NEXT:    [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
-; AVX256NODQ-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
-; AVX256NODQ-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
-; AVX256NODQ-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-; AVX256NODQ-NEXT:    store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; AVX256NODQ-NEXT:    store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; AVX256NODQ-NEXT:    ret void
-;
-; AVX512-LABEL: @sitofp_8i64_8f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
-; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX512-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX512-NEXT:    ret void
-;
-; AVX256DQ-LABEL: @sitofp_8i64_8f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
-; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX256DQ-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256DQ-NEXT:    ret void
+; AVX-LABEL: @sitofp_8i64_8f32(
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
+; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    ret void
 ;
   %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
   %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8