r249097 - Fix the SSE4 byte sign extension in a cleaner way, and more thoroughly

Chandler Carruth via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 1 16:40:12 PDT 2015


Author: chandlerc
Date: Thu Oct  1 18:40:12 2015
New Revision: 249097

URL: http://llvm.org/viewvc/llvm-project?rev=249097&view=rev
Log:
Fix the SSE4 byte sign extension in a cleaner way, and more thoroughly
test that our intrinsics behave the same under -fsigned-char and
-funsigned-char.

This further testing uncovered that AVX-2 has a broken cmpgt for 8-bit
elements, and has for a long time. This is fixed in the same way as
SSE4 handles the case.

The other ISA extensions currently work correctly because they use
specific instruction intrinsics. As soon as they are rewritten in terms
of generic IR, they will need to add these special casts. I've added the
necessary testing to catch this however, so we shouldn't have to chase
it down again.

I considered changing the core typedef to be signed, but that seems like
a bad idea. Notably, it would be an ABI break if anyone is reaching into
the innards of the intrinsic headers and passing __v16qi on an API
boundary. I can't be completely confident that this wouldn't happen due
to a macro expanding in a lambda, etc., so it seems much better to leave
it alone. It also matches GCC's behavior exactly.

A fun side note is that for both GCC and Clang, -funsigned-char really
does change the semantics of __v16qi. To observe this, consider:

  % cat x.cc
  #include <smmintrin.h>
  #include <iostream>

  int main() {
    __v16qi a = { 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
    __v16qi b = _mm_set1_epi8(-1);
    std::cout << (int)(a / b)[0] << ", " << (int)(a / b)[1] << '\n';
  }
  % clang++ -o x x.cc && ./x
  -1, 1
  % clang++ -funsigned-char -o x x.cc && ./x
  0, 1

However, while this may be surprising, both Clang and GCC agree.

Differential Revision: http://reviews.llvm.org/D13324

Modified:
    cfe/trunk/lib/Headers/avx2intrin.h
    cfe/trunk/lib/Headers/avxintrin.h
    cfe/trunk/lib/Headers/emmintrin.h
    cfe/trunk/lib/Headers/smmintrin.h
    cfe/trunk/test/CodeGen/avx2-builtins.c
    cfe/trunk/test/CodeGen/avx512bw-builtins.c
    cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
    cfe/trunk/test/CodeGen/mmx-builtins.c
    cfe/trunk/test/CodeGen/sse42-builtins.c

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Thu Oct  1 18:40:12 2015
@@ -208,7 +208,9 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v32qi)__a > (__v32qi)__b);
+  /* This function always performs a signed comparison, but __v32qi is a char
+     which may be signed or unsigned, so use __v32qs. */
+  return (__m256i)((__v32qs)__a > (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Thu Oct  1 18:40:12 2015
@@ -35,6 +35,10 @@ typedef int __v8si __attribute__ ((__vec
 typedef short __v16hi __attribute__ ((__vector_size__ (32)));
 typedef char __v32qi __attribute__ ((__vector_size__ (32)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v32qs __attribute__((__vector_size__(32)));
+
 typedef float __m256 __attribute__ ((__vector_size__ (32)));
 typedef double __m256d __attribute__((__vector_size__(32)));
 typedef long long __m256i __attribute__((__vector_size__(32)));

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Thu Oct  1 18:40:12 2015
@@ -35,6 +35,10 @@ typedef long long __v2di __attribute__ (
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v16qs __attribute__((__vector_size__(16)));
+
 #include <f16cintrin.h>
 
 /* Define the default attributes for the functions in this file. */
@@ -996,8 +1000,7 @@ static __inline__ __m128i __DEFAULT_FN_A
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
   /* This function always performs a signed comparison, but __v16qi is a char
-     which may be signed or unsigned. */
-  typedef signed char __v16qs __attribute__((__vector_size__(16)));
+     which may be signed or unsigned, so use __v16qs. */
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Thu Oct  1 18:40:12 2015
@@ -286,34 +286,26 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi16(__m128i __V)
 {
-  /* We need a local definitively signed typedef similar to __v16qi even in the
-   * presence of __UNSIGNED_CHAR__.
-   * FIXME: __v16qi should almost certainly be definitively signed.
-   */
-  typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
-  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi32(__m128i __V)
 {
-  /* We need a local definitively signed typedef similar to __v16qi even in the
-   * presence of __UNSIGNED_CHAR__.
-   * FIXME: __v16qi should almost certainly be definitively signed.
-   */
-  typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
-  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1, 2, 3), __v4si);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi64(__m128i __V)
 {
-  /* We need a local definitively signed typedef similar to __v16qi even in the
-   * presence of __UNSIGNED_CHAR__.
-   * FIXME: __v16qi should almost certainly be definitively signed.
-   */
-  typedef signed char __signed_v16qi __attribute__((__vector_size__(16)));
-  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__signed_v16qi)__V, (__signed_v16qi)__V, 0, 1), __v2di);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  typedef signed char __v16qs __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Oct  1 18:40:12 2015
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -S -o - -Werror | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -S -o - -Werror | FileCheck %s --check-prefix=CHECK-ASM
 
 // REQUIRES: x86-registered-target
 

Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Thu Oct  1 18:40:12 2015
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 #include <immintrin.h>
 

Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Thu Oct  1 18:40:12 2015
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 #include <immintrin.h>
 

Modified: cfe/trunk/test/CodeGen/mmx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/mmx-builtins.c?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/mmx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/mmx-builtins.c Thu Oct  1 18:40:12 2015
@@ -1,5 +1,6 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +ssse3 -S -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -S -o - | FileCheck %s
 
 // FIXME: Disable inclusion of mm_malloc.h, our current implementation is broken
 // on win32 since we don't generally know how to find errno.h.

Modified: cfe/trunk/test/CodeGen/sse42-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse42-builtins.c?rev=249097&r1=249096&r2=249097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse42-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse42-builtins.c Thu Oct  1 18:40:12 2015
@@ -1,12 +1,35 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 // RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +sse4.2 -S -o - -Werror | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -S -o - -Werror | FileCheck %s --check-prefix=CHECK-ASM
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
 #include <x86intrin.h>
 
+__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  // CHECK-ASM: pcmpgtb %xmm{{.*}}, %xmm{{.*}}
+  return _mm_cmpgt_epi8(A, B);
+}
+
+__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  // CHECK-ASM: pcmpgtw %xmm{{.*}}, %xmm{{.*}}
+  return _mm_cmpgt_epi16(A, B);
+}
+
+__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  // CHECK-ASM: pcmpgtd %xmm{{.*}}, %xmm{{.*}}
+  return _mm_cmpgt_epi32(A, B);
+}
+
 __m128i test_mm_cmpgt_epi64(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_cmpgt_epi64
   // CHECK: icmp sgt <2 x i64>




More information about the cfe-commits mailing list