[clang] 5184dc2 - [Clang][X86] Change X86 cast intrinsics to use __builtin_nondeterministic_value

via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 17 04:59:22 PDT 2023


Author: ManuelJBrito
Date: 2023-04-17T12:58:36+01:00
New Revision: 5184dc2d7cce5971f4386c6f99d04f87233dd313

URL: https://github.com/llvm/llvm-project/commit/5184dc2d7cce5971f4386c6f99d04f87233dd313
DIFF: https://github.com/llvm/llvm-project/commit/5184dc2d7cce5971f4386c6f99d04f87233dd313.diff

LOG: [Clang][X86] Change X86 cast intrinsics to use __builtin_nondeterministic_value

The following intrinsics are currently implemented using a shufflevector with
an undefined mask, this is however incorrect according to intel's semantics for
undefined value which expect an unknown but consistent value.

With __builtin_nondeterministic_value we can now match intel's undefined value.

Differential Revision: https://reviews.llvm.org/D143287

Added: 
    clang/test/CodeGen/X86/avx-cast-builtins.c

Modified: 
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512fp16intrin.h
    clang/lib/Headers/avxintrin.h
    clang/test/CodeGen/X86/avx-builtins.c
    clang/test/CodeGen/X86/avx512f-builtins.c
    clang/test/CodeGen/X86/avx512fp16-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index b19d2fb90ff58..88a8cebbee301 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -397,14 +397,15 @@ _mm512_broadcastsd_pd(__m128d __A)
 static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_castpd256_pd512(__m256d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
+  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
+                                 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_castps256_ps512(__m256 __a)
 {
-  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
-                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
 static __inline __m128d __DEFAULT_FN_ATTRS512
@@ -446,7 +447,10 @@ _mm512_castpd_si512 (__m512d __A)
 static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_castpd128_pd512 (__m128d __A)
 {
-  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
+  __m256d __B = __builtin_nondeterministic_value(__B);
+  return __builtin_shufflevector(
+      __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
+      __B, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS512
@@ -464,19 +468,25 @@ _mm512_castps_si512 (__m512 __A)
 static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_castps128_ps512 (__m128 __A)
 {
-    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+  __m256 __B = __builtin_nondeterministic_value(__B);
+  return __builtin_shufflevector(
+      __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
+      __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_castsi128_si512 (__m128i __A)
 {
-   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
+  __m256i __B = __builtin_nondeterministic_value(__B);
+  return __builtin_shufflevector(
+      __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
+      __B, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_castsi256_si512 (__m256i __A)
 {
-   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
+   return  __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS512

diff  --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 5cdc37fde629f..d326586578bb3 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -192,22 +192,26 @@ _mm512_castph512_ph256(__m512h __a) {
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_castph128_ph256(__m128h __a) {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
-                                 -1, -1, -1, -1, -1);
+  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a),
+                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_castph128_ph512(__m128h __a) {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
-                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                                 -1, -1, -1, -1, -1, -1, -1, -1, -1);
+  __m256h __b = __builtin_nondeterministic_value(__b);
+  return __builtin_shufflevector(
+      __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a),
+                              0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
+      __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+      20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_castph256_ph512(__m256h __a) {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
-                                 -1, -1, -1, -1, -1, -1, -1, -1);
+  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                                 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                                 27, 28, 29, 30, 31);
 }
 
 /// Constructs a 256-bit floating-point vector of [16 x half] from a

diff  --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index ee31569c16234..bd119220559e4 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4499,7 +4499,8 @@ _mm256_castsi256_si128(__m256i __a)
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_castpd128_pd256(__m128d __a)
 {
-  return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);
+  return __builtin_shufflevector(
+      (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
 }
 
 /// Constructs a 256-bit floating-point vector of [8 x float] from a
@@ -4520,7 +4521,9 @@ _mm256_castpd128_pd256(__m128d __a)
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_castps128_ps256(__m128 __a)
 {
-  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);
+  return __builtin_shufflevector((__v4sf)__a,
+                                 (__v4sf)__builtin_nondeterministic_value(__a),
+                                 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
@@ -4539,7 +4542,8 @@ _mm256_castps128_ps256(__m128 __a)
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_castsi128_si256(__m128i __a)
 {
-  return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);
+  return __builtin_shufflevector(
+      (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
 }
 
 /// Constructs a 256-bit floating-point vector of [4 x double] from a

diff  --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index e76855906381d..761ab6e9eb2cb 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -143,7 +143,8 @@ __m256i test_mm256_castpd_si256(__m256d A) {
 
 __m256d test_mm256_castpd128_pd256(__m128d A) {
   // CHECK-LABEL: test_mm256_castpd128_pd256
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <2 x double> poison
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_castpd128_pd256(A);
 }
 
@@ -165,7 +166,8 @@ __m256i test_mm256_castps_si256(__m256 A) {
 
 __m256 test_mm256_castps128_ps256(__m128 A) {
   // CHECK-LABEL: test_mm256_castps128_ps256
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <4 x float> poison
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_castps128_ps256(A);
 }
 
@@ -177,7 +179,8 @@ __m128 test_mm256_castps256_ps128(__m256 A) {
 
 __m256i test_mm256_castsi128_si256(__m128i A) {
   // CHECK-LABEL: test_mm256_castsi128_si256
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <2 x i64> poison
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_castsi128_si256(A);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx-cast-builtins.c b/clang/test/CodeGen/X86/avx-cast-builtins.c
new file mode 100644
index 0000000000000..8b941c4287b9a
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx-cast-builtins.c
@@ -0,0 +1,101 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -O3 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f  -target-feature +avx512fp16 -S -o - | FileCheck %s
+
+
+#include <immintrin.h>
+
+__m256d test_mm256_castpd128_pd256(__m128d A) {
+  // CHECK-LABEL: test_mm256_castpd128_pd256
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm256_castpd128_pd256(A);
+}
+
+__m256 test_mm256_castps128_ps256(__m128 A) {
+  // CHECK-LABEL: test_mm256_castps128_ps256
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm256_castps128_ps256(A);
+}
+
+__m256i test_mm256_castsi128_si256(__m128i A) {
+  // CHECK-LABEL: test_mm256_castsi128_si256
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm256_castsi128_si256(A);
+}
+
+__m256h test_mm256_castph128_ph256(__m128h A) {
+  // CHECK-LABEL: test_mm256_castph128_ph256
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm256_castph128_ph256(A);
+}
+
+__m512h test_mm512_castph128_ph512(__m128h A) {
+  // CHECK-LABEL: test_mm512_castph128_ph512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castph128_ph512(A);
+}
+
+__m512h test_mm512_castph256_ph512(__m256h A) {
+  // CHECK-LABEL: test_mm512_castph256_ph512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castph256_ph512(A);
+}
+
+__m512d test_mm512_castpd256_pd512(__m256d A){
+  // CHECK-LABEL: test_mm512_castpd256_pd512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castpd256_pd512(A);
+}
+
+__m512 test_mm512_castps256_ps512(__m256 A){
+  // CHECK-LABEL: test_mm512_castps256_ps512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castps256_ps512(A);
+}
+
+__m512d test_mm512_castpd128_pd512(__m128d A){
+  // CHECK-LABEL: test_mm512_castpd128_pd512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castpd128_pd512(A);
+}
+
+__m512 test_mm512_castps128_ps512(__m128 A){
+  // CHECK-LABEL: test_mm512_castps128_ps512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castps128_ps512(A);
+}
+
+__m512i test_mm512_castsi128_si512(__m128i A){
+  // CHECK-LABEL: test_mm512_castsi128_si512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castsi128_si512(A);
+}
+
+__m512i test_mm512_castsi256_si512(__m256i A){
+  // CHECK-LABEL: test_mm512_castsi256_si512
+  // CHECK:         # %bb.0:
+  // CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+  // CHECK-NEXT:    ret{{[l|q]}}
+  return _mm512_castsi256_si512(A);
+}

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index dfafcf812fe24..a661c52359503 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -8987,13 +8987,23 @@ __m512i test_mm512_castpd_si512 (__m512d __A)
 
 __m512 test_mm512_castps128_ps512(__m128 __A) {
   // CHECK-LABEL: @test_mm512_castps128_ps512
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[B:%.*]] = freeze <8 x float> poison
+  // CHECK: store <8 x float> [[B]], ptr [[BA:%.*]]
+  // CHECK: [[A:%.*]] = freeze <4 x float> poison 
+  // CHECK: [[SV:%.*]] = shufflevector <4 x float> %{{.*}}, <4 x float> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: [[C:%.*]] = load <8 x float>, ptr [[BA]]
+  // CHECK: shufflevector <8 x float> [[SV]], <8 x float> [[C]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_castps128_ps512(__A); 
 }
 
 __m512d test_mm512_castpd128_pd512(__m128d __A) {
   // CHECK-LABEL: @test_mm512_castpd128_pd512
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[B:%.*]] = freeze <4 x double> poison
+  // CHECK: store <4 x double> [[B]], ptr [[BA:%.*]]
+  // CHECK: [[A:%.*]] = freeze <2 x double> poison 
+  // CHECK: [[SV:%.*]] = shufflevector <2 x double> %{{.*}}, <2 x double> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: [[C:%.*]] = load <4 x double>, ptr [[BA]]
+  // CHECK: shufflevector <4 x double> [[SV]], <4 x double> [[C]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_castpd128_pd512(__A); 
 }
 
@@ -9086,7 +9096,8 @@ __m512d test_mm512_setr4_pd(double e0, double e1, double e2, double e3)
 __m512d test_mm512_castpd256_pd512(__m256d a)
 {
   // CHECK-LABEL: @test_mm512_castpd256_pd512
-  // CHECK: shufflevector <4 x double> {{.*}} <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <4 x double> poison 
+  // CHECK: shufflevector <4 x double> %{{.}}, <4 x double> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_castpd256_pd512(a);
 }
 
@@ -9112,13 +9123,19 @@ __m512i test_mm512_castps_si512 (__m512 __A)
 }
 __m512i test_mm512_castsi128_si512(__m128i __A) {
   // CHECK-LABEL: @test_mm512_castsi128_si512
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[B:%.*]] = freeze <4 x i64> poison
+  // CHECK: store <4 x i64> [[B]], ptr [[BA:%.*]]
+  // CHECK: [[A:%.*]] = freeze <2 x i64> poison 
+  // CHECK: [[SV:%.*]] = shufflevector <2 x i64> %{{.*}}, <2 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: [[C:%.*]] = load <4 x i64>, ptr [[BA]]
+  // CHECK: shufflevector <4 x i64> [[SV]], <4 x i64> [[C]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_castsi128_si512(__A); 
 }
 
 __m512i test_mm512_castsi256_si512(__m256i __A) {
   // CHECK-LABEL: @test_mm512_castsi256_si512
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <4 x i64> poison 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_castsi256_si512(__A); 
 }
 

diff  --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index f1049b36f3cec..23bd0d3df3f2f 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -325,19 +325,26 @@ __m256h test_mm512_castph512_ph256(__m512h __a) {
 
 __m256h test_mm256_castph128_ph256(__m128h __a) {
   // CHECK-LABEL: test_mm256_castph128_ph256
-  // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <8 x half> poison 
+  // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm256_castph128_ph256(__a);
 }
 
 __m512h test_mm512_castph128_ph512(__m128h __a) {
   // CHECK-LABEL: test_mm512_castph128_ph512
-  // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[B:%.*]] = freeze <16 x half> poison
+  // CHECK: store <16 x half> [[B]], ptr [[BA:%.*]]
+  // CHECK: [[A:%.*]] = freeze <8 x half> poison
+  // CHECK: [[SV:%.*]] = shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: [[C:%.*]] = load <16 x half>, ptr [[BA]]
+  // CHECK: shufflevector <16 x half> [[SV]], <16 x half> [[C]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   return _mm512_castph128_ph512(__a);
 }
 
 __m512h test_mm512_castph256_ph512(__m256h __a) {
   // CHECK-LABEL: test_mm512_castph256_ph512
-  // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: [[A:%.*]] = freeze <16 x half> poison 
+  // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> [[A]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   return _mm512_castph256_ph512(__a);
 }
 


        


More information about the cfe-commits mailing list