r374419 - [ARM] Fix arm_neon.h with -flax-vector-conversions=none, part 2.

Thu Oct 10 11:45:34 PDT 2019

Author: efriedma
Date: Thu Oct 10 11:45:34 2019
New Revision: 374419

URL: http://llvm.org/viewvc/llvm-project?rev=374419&view=rev
Log:
[ARM] Fix arm_neon.h with -flax-vector-conversions=none, part 2.

Just running -fsyntax-only over arm_neon.h doesn't cover some intrinsics
which are defined using macros.  Add more test coverage for that.

arm-neon-header.c wasn't checking the full set of available NEON target
features; change the target architecture of the test to account for
that.

Fix the generator for arm_neon.h to generate casts in more cases where
they are necessary.

Fix VFMLAL_LOW etc. to express their signatures differently, so the
builtins have the expected type. Maybe the TableGen backend should
detect intrinsics that are defined the wrong way, and produce an error.
The rules here are sort of strange.

Differential Revision: https://reviews.llvm.org/D68743


Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
    cfe/trunk/test/CodeGen/arm_neon_intrinsics.c
    cfe/trunk/test/Headers/arm-neon-header.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=374419&r1=374418&r2=374419&view=diff
==============================================================================

--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Thu Oct 10 11:45:34 2019
@@ -1651,10 +1651,10 @@ let ArchGuard = "defined(__ARM_FEATURE_D
 
 // v8.2-A FP16 fused multiply-add long instructions.
 let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in {
-  def VFMLAL_LOW  : SInst<"vfmlal_low",  "ffHH", "hQh">;
-  def VFMLSL_LOW  : SInst<"vfmlsl_low",  "ffHH", "hQh">;
-  def VFMLAL_HIGH : SInst<"vfmlal_high", "ffHH", "hQh">;
-  def VFMLSL_HIGH : SInst<"vfmlsl_high", "ffHH", "hQh">;
+  def VFMLAL_LOW  : SInst<"vfmlal_low",  "nndd", "hQh">;
+  def VFMLSL_LOW  : SInst<"vfmlsl_low",  "nndd", "hQh">;
+  def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">;
+  def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">;
 
   def VFMLAL_LANE_LOW  : SOpInst<"vfmlal_lane_low",  "ffH0i", "hQh", OP_FMLAL_LN>;
   def VFMLSL_LANE_LOW  : SOpInst<"vfmlsl_lane_low",  "ffH0i", "hQh", OP_FMLSL_LN>;

Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=374419&r1=374418&r2=374419&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Thu Oct 10 11:45:34 2019
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN:     -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
+// RUN:     -fallow-half-arguments-and-returns -S -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
 // RUN: | opt -S -mem2reg \
 // RUN: | FileCheck %s
 
@@ -406,7 +407,7 @@ int8x8_t test_vmla_s8(int8x8_t v1, int8x
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[TMP0]]
 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
-  return vmla_s16(v1, v2, v3);
+  return (int8x8_t)vmla_s16(v1, v2, v3);
 }
 
 // CHECK-LABEL: @test_vmla_s32(
@@ -527,7 +528,7 @@ int8x8_t test_vmls_s8(int8x8_t v1, int8x
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[TMP0]]
 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
-  return vmls_s16(v1, v2, v3);
+  return (int8x8_t)vmls_s16(v1, v2, v3);
 }
 
 // CHECK-LABEL: @test_vmls_s32(
@@ -978,7 +979,7 @@ int8x8_t test_vbsl_s8(uint8x8_t v1, int8
 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
 // CHECK:   ret <8 x i8> [[TMP4]]
 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
-  return vbsl_s16(v1, v2, v3);
+  return (int8x8_t)vbsl_s16(v1, v2, v3);
 }
 
 // CHECK-LABEL: @test_vbsl_s32(
@@ -1003,7 +1004,7 @@ int32x2_t test_vbsl_s32(uint32x2_t v1, i
 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   ret <1 x i64> [[VBSL5_I]]
-uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
   return vbsl_s64(v1, v2, v3);
 }
 
@@ -1057,19 +1058,18 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1,
 }
 
 // CHECK-LABEL: @test_vbsl_f32(
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
-// CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]]
-// CHECK:   [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], <i32 -1, i32 -1>
+// CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
+// CHECK:   [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
 // CHECK:   ret <2 x float> [[TMP5]]
-float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
   return vbsl_f32(v1, v2, v3);
 }
 
@@ -4661,7 +4661,7 @@ int64x2_t test_vshlq_n_s64(int64x2_t a)
 // CHECK-LABEL: @test_vshl_n_u8(
 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   ret <8 x i8> [[VSHL_N]]
-int8x8_t test_vshl_n_u8(int8x8_t a) {
+uint8x8_t test_vshl_n_u8(uint8x8_t a) {
   return vshl_n_u8(a, 3);
 }
 
@@ -4670,7 +4670,7 @@ int8x8_t test_vshl_n_u8(int8x8_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
 // CHECK:   ret <4 x i16> [[VSHL_N]]
-int16x4_t test_vshl_n_u16(int16x4_t a) {
+uint16x4_t test_vshl_n_u16(uint16x4_t a) {
   return vshl_n_u16(a, 3);
 }
 
@@ -4679,14 +4679,14 @@ int16x4_t test_vshl_n_u16(int16x4_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
 // CHECK:   ret <2 x i32> [[VSHL_N]]
-int32x2_t test_vshl_n_u32(int32x2_t a) {
+uint32x2_t test_vshl_n_u32(uint32x2_t a) {
   return vshl_n_u32(a, 3);
 }
 
 // CHECK-LABEL: @test_vshlq_n_u8(
 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   ret <16 x i8> [[VSHL_N]]
-int8x16_t test_vshlq_n_u8(int8x16_t a) {
+uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
   return vshlq_n_u8(a, 3);
 }
 
@@ -4695,7 +4695,7 @@ int8x16_t test_vshlq_n_u8(int8x16_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 // CHECK:   ret <8 x i16> [[VSHL_N]]
-int16x8_t test_vshlq_n_u16(int16x8_t a) {
+uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
   return vshlq_n_u16(a, 3);
 }
 
@@ -4704,7 +4704,7 @@ int16x8_t test_vshlq_n_u16(int16x8_t a)
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   ret <4 x i32> [[VSHL_N]]
-int32x4_t test_vshlq_n_u32(int32x4_t a) {
+uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
   return vshlq_n_u32(a, 3);
 }
 
@@ -4713,7 +4713,7 @@ int32x4_t test_vshlq_n_u32(int32x4_t a)
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
 // CHECK:   ret <2 x i64> [[VSHL_N]]
-int64x2_t test_vshlq_n_u64(int64x2_t a) {
+uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
   return vshlq_n_u64(a, 3);
 }
 
@@ -4779,7 +4779,7 @@ int64x2_t test_vshrq_n_s64(int64x2_t a)
 // CHECK-LABEL: @test_vshr_n_u8(
 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   ret <8 x i8> [[VSHR_N]]
-int8x8_t test_vshr_n_u8(int8x8_t a) {
+uint8x8_t test_vshr_n_u8(uint8x8_t a) {
   return vshr_n_u8(a, 3);
 }
 
@@ -4788,7 +4788,7 @@ int8x8_t test_vshr_n_u8(int8x8_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
 // CHECK:   ret <4 x i16> [[VSHR_N]]
-int16x4_t test_vshr_n_u16(int16x4_t a) {
+uint16x4_t test_vshr_n_u16(uint16x4_t a) {
   return vshr_n_u16(a, 3);
 }
 
@@ -4797,14 +4797,14 @@ int16x4_t test_vshr_n_u16(int16x4_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
 // CHECK:   ret <2 x i32> [[VSHR_N]]
-int32x2_t test_vshr_n_u32(int32x2_t a) {
+uint32x2_t test_vshr_n_u32(uint32x2_t a) {
   return vshr_n_u32(a, 3);
 }
 
 // CHECK-LABEL: @test_vshrq_n_u8(
 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   ret <16 x i8> [[VSHR_N]]
-int8x16_t test_vshrq_n_u8(int8x16_t a) {
+uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
   return vshrq_n_u8(a, 3);
 }
 
@@ -4813,7 +4813,7 @@ int8x16_t test_vshrq_n_u8(int8x16_t a) {
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 // CHECK:   ret <8 x i16> [[VSHR_N]]
-int16x8_t test_vshrq_n_u16(int16x8_t a) {
+uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
   return vshrq_n_u16(a, 3);
 }
 
@@ -4822,7 +4822,7 @@ int16x8_t test_vshrq_n_u16(int16x8_t a)
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   ret <4 x i32> [[VSHR_N]]
-int32x4_t test_vshrq_n_u32(int32x4_t a) {
+uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
   return vshrq_n_u32(a, 3);
 }
 
@@ -4831,7 +4831,7 @@ int32x4_t test_vshrq_n_u32(int32x4_t a)
 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
 // CHECK:   ret <2 x i64> [[VSHR_N]]
-int64x2_t test_vshrq_n_u64(int64x2_t a) {
+uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
   return vshrq_n_u64(a, 3);
 }
 
@@ -4915,7 +4915,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a,
 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
 // CHECK:   ret <8 x i8> [[TMP0]]
-int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
+uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
   return vsra_n_u8(a, b, 3);
 }
 
@@ -4927,7 +4927,7 @@ int8x8_t test_vsra_n_u8(int8x8_t a, int8
 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
 // CHECK:   ret <4 x i16> [[TMP4]]
-int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
+uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
   return vsra_n_u16(a, b, 3);
 }
 
@@ -4939,7 +4939,7 @@ int16x4_t test_vsra_n_u16(int16x4_t a, i
 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
 // CHECK:   ret <2 x i32> [[TMP4]]
-int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
+uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
   return vsra_n_u32(a, b, 3);
 }
 
@@ -4947,7 +4947,7 @@ int32x2_t test_vsra_n_u32(int32x2_t a, i
 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
 // CHECK:   ret <16 x i8> [[TMP0]]
-int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
+uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
   return vsraq_n_u8(a, b, 3);
 }
 
@@ -4959,7 +4959,7 @@ int8x16_t test_vsraq_n_u8(int8x16_t a, i
 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
 // CHECK:   ret <8 x i16> [[TMP4]]
-int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
+uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
   return vsraq_n_u16(a, b, 3);
 }
 
@@ -4971,7 +4971,7 @@ int16x8_t test_vsraq_n_u16(int16x8_t a,
 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
 // CHECK:   ret <4 x i32> [[TMP4]]
-int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
+uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
   return vsraq_n_u32(a, b, 3);
 }
 
@@ -4983,7 +4983,7 @@ int32x4_t test_vsraq_n_u32(int32x4_t a,
 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
 // CHECK:   ret <2 x i64> [[TMP4]]
-int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
+uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
   return vsraq_n_u64(a, b, 3);
 }
 
@@ -5049,7 +5049,7 @@ int64x2_t test_vrshrq_n_s64(int64x2_t a)
 // CHECK-LABEL: @test_vrshr_n_u8(
 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
 // CHECK:   ret <8 x i8> [[VRSHR_N]]
-int8x8_t test_vrshr_n_u8(int8x8_t a) {
+uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
   return vrshr_n_u8(a, 3);
 }
 
@@ -5058,7 +5058,7 @@ int8x8_t test_vrshr_n_u8(int8x8_t a) {
 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
-int16x4_t test_vrshr_n_u16(int16x4_t a) {
+uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
   return vrshr_n_u16(a, 3);
 }
 
@@ -5067,14 +5067,14 @@ int16x4_t test_vrshr_n_u16(int16x4_t a)
 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
-int32x2_t test_vrshr_n_u32(int32x2_t a) {
+uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
   return vrshr_n_u32(a, 3);
 }
 
 // CHECK-LABEL: @test_vrshrq_n_u8(
 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
 // CHECK:   ret <16 x i8> [[VRSHR_N]]
-int8x16_t test_vrshrq_n_u8(int8x16_t a) {
+uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
   return vrshrq_n_u8(a, 3);
 }
 
@@ -5083,7 +5083,7 @@ int8x16_t test_vrshrq_n_u8(int8x16_t a)
 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
-int16x8_t test_vrshrq_n_u16(int16x8_t a) {
+uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
   return vrshrq_n_u16(a, 3);
 }
 
@@ -5092,7 +5092,7 @@ int16x8_t test_vrshrq_n_u16(int16x8_t a)
 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
-int32x4_t test_vrshrq_n_u32(int32x4_t a) {
+uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
   return vrshrq_n_u32(a, 3);
 }
 
@@ -5101,7 +5101,7 @@ int32x4_t test_vrshrq_n_u32(int32x4_t a)
 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
-int64x2_t test_vrshrq_n_u64(int64x2_t a) {
+uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
   return vrshrq_n_u64(a, 3);
 }
 
@@ -5185,7 +5185,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a,
 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
 // CHECK:   ret <8 x i8> [[TMP0]]
-int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
+uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
   return vrsra_n_u8(a, b, 3);
 }
 
@@ -5197,7 +5197,7 @@ int8x8_t test_vrsra_n_u8(int8x8_t a, int
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
 // CHECK:   ret <4 x i16> [[TMP3]]
-int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
+uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
   return vrsra_n_u16(a, b, 3);
 }
 
@@ -5209,7 +5209,7 @@ int16x4_t test_vrsra_n_u16(int16x4_t a,
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
 // CHECK:   ret <2 x i32> [[TMP3]]
-int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
+uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
   return vrsra_n_u32(a, b, 3);
 }
 
@@ -5217,7 +5217,7 @@ int32x2_t test_vrsra_n_u32(int32x2_t a,
 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
 // CHECK:   ret <16 x i8> [[TMP0]]
-int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
+uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
   return vrsraq_n_u8(a, b, 3);
 }
 
@@ -5229,7 +5229,7 @@ int8x16_t test_vrsraq_n_u8(int8x16_t a,
 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
 // CHECK:   ret <8 x i16> [[TMP3]]
-int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
+uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
   return vrsraq_n_u16(a, b, 3);
 }
 
@@ -5241,7 +5241,7 @@ int16x8_t test_vrsraq_n_u16(int16x8_t a,
 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
 // CHECK:   ret <4 x i32> [[TMP3]]
-int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
+uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
   return vrsraq_n_u32(a, b, 3);
 }
 
@@ -5253,7 +5253,7 @@ int32x4_t test_vrsraq_n_u32(int32x4_t a,
 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
 // CHECK:   ret <2 x i64> [[TMP3]]
-int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
+uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
   return vrsraq_n_u64(a, b, 3);
 }
 
@@ -5329,7 +5329,7 @@ int64x2_t test_vsriq_n_s64(int64x2_t a,
 // CHECK-LABEL: @test_vsri_n_u8(
 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
 // CHECK:   ret <8 x i8> [[VSRI_N]]
-int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
+uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
   return vsri_n_u8(a, b, 3);
 }
 
@@ -5340,7 +5340,7 @@ int8x8_t test_vsri_n_u8(int8x8_t a, int8
 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
 // CHECK:   ret <4 x i16> [[VSRI_N2]]
-int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
+uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
   return vsri_n_u16(a, b, 3);
 }
 
@@ -5351,14 +5351,14 @@ int16x4_t test_vsri_n_u16(int16x4_t a, i
 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
 // CHECK:   ret <2 x i32> [[VSRI_N2]]
-int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
+uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
   return vsri_n_u32(a, b, 3);
 }
 
 // CHECK-LABEL: @test_vsriq_n_u8(
 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
 // CHECK:   ret <16 x i8> [[VSRI_N]]
-int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
+uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
   return vsriq_n_u8(a, b, 3);
 }
 
@@ -5369,7 +5369,7 @@ int8x16_t test_vsriq_n_u8(int8x16_t a, i
 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
 // CHECK:   ret <8 x i16> [[VSRI_N2]]
-int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
+uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
   return vsriq_n_u16(a, b, 3);
 }
 
@@ -5380,7 +5380,7 @@ int16x8_t test_vsriq_n_u16(int16x8_t a,
 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
 // CHECK:   ret <4 x i32> [[VSRI_N2]]
-int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
+uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
   return vsriq_n_u32(a, b, 3);
 }
 
@@ -5391,7 +5391,7 @@ int32x4_t test_vsriq_n_u32(int32x4_t a,
 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
 // CHECK:   ret <2 x i64> [[VSRI_N2]]
-int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
+uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
   return vsriq_n_u64(a, b, 3);
 }
 
@@ -5608,7 +5608,7 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a
 // CHECK-LABEL: @test_vqshlu_n_s8(
 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
-int8x8_t test_vqshlu_n_s8(int8x8_t a) {
+uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
   return vqshlu_n_s8(a, 3);
 }
 
@@ -5617,7 +5617,7 @@ int8x8_t test_vqshlu_n_s8(int8x8_t a) {
 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
-int16x4_t test_vqshlu_n_s16(int16x4_t a) {
+uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
   return vqshlu_n_s16(a, 3);
 }
 
@@ -5626,14 +5626,14 @@ int16x4_t test_vqshlu_n_s16(int16x4_t a)
 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
-int32x2_t test_vqshlu_n_s32(int32x2_t a) {
+uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
   return vqshlu_n_s32(a, 3);
 }
 
 // CHECK-LABEL: @test_vqshluq_n_s8(
 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
-int8x16_t test_vqshluq_n_s8(int8x16_t a) {
+uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
   return vqshluq_n_s8(a, 3);
 }
 
@@ -5642,7 +5642,7 @@ int8x16_t test_vqshluq_n_s8(int8x16_t a)
 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
-int16x8_t test_vqshluq_n_s16(int16x8_t a) {
+uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
   return vqshluq_n_s16(a, 3);
 }
 
@@ -5651,7 +5651,7 @@ int16x8_t test_vqshluq_n_s16(int16x8_t a
 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
-int32x4_t test_vqshluq_n_s32(int32x4_t a) {
+uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
   return vqshluq_n_s32(a, 3);
 }
 
@@ -5660,7 +5660,7 @@ int32x4_t test_vqshluq_n_s32(int32x4_t a
 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
-int64x2_t test_vqshluq_n_s64(int64x2_t a) {
+uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
   return vqshluq_n_s64(a, 3);
 }
 
@@ -5795,7 +5795,7 @@ uint32x4_t test_vshrn_high_n_u64(uint32x
 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
-int8x8_t test_vqshrun_n_s16(int16x8_t a) {
+uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
   return vqshrun_n_s16(a, 3);
 }
 
@@ -5804,7 +5804,7 @@ int8x8_t test_vqshrun_n_s16(int16x8_t a)
 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
-int16x4_t test_vqshrun_n_s32(int32x4_t a) {
+uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
   return vqshrun_n_s32(a, 9);
 }
 
@@ -5813,7 +5813,7 @@ int16x4_t test_vqshrun_n_s32(int32x4_t a
 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
-int32x2_t test_vqshrun_n_s64(int64x2_t a) {
+uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
   return vqshrun_n_s64(a, 19);
 }
 
@@ -5966,7 +5966,7 @@ uint32x4_t test_vrshrn_high_n_u64(uint32
 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
-int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
   return vqrshrun_n_s16(a, 3);
 }
 
@@ -5975,7 +5975,7 @@ int8x8_t test_vqrshrun_n_s16(int16x8_t a
 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
-int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
   return vqrshrun_n_s32(a, 9);
 }
 
@@ -5984,7 +5984,7 @@ int16x4_t test_vqrshrun_n_s32(int32x4_t
 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
-int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
   return vqrshrun_n_s64(a, 19);
 }
 

Modified: cfe/trunk/test/CodeGen/arm_neon_intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm_neon_intrinsics.c?rev=374419&r1=374418&r2=374419&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm_neon_intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/arm_neon_intrinsics.c Thu Oct 10 11:45:34 2019
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
 // RUN:  -target-cpu swift -fallow-half-arguments-and-returns \
 // RUN:  -target-feature +fullfp16 -ffreestanding \
+// RUN:  -flax-vector-conversions=none \
 // RUN:  -disable-O0-optnone -emit-llvm -o - %s \
 // RUN:  | opt -S -mem2reg | FileCheck %s
 
@@ -2184,8 +2185,8 @@ float32x2_t test_vcreate_f32(uint64_t a)
 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
-uint8x8_t test_vcreate_u8(uint64_t a) {
-  return vclz_s8(vcreate_u8(a));
+int8x8_t test_vcreate_u8(uint64_t a) {
+  return vclz_s8((int8x8_t)vcreate_u8(a));
 }
 
 // CHECK-LABEL: @test_vcreate_u16(
@@ -2194,8 +2195,8 @@ uint8x8_t test_vcreate_u8(uint64_t a) {
 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
-uint16x4_t test_vcreate_u16(uint64_t a) {
-  return vclz_s16(vcreate_u16(a));
+int16x4_t test_vcreate_u16(uint64_t a) {
+  return vclz_s16((int16x4_t)vcreate_u16(a));
 }
 
 // CHECK-LABEL: @test_vcreate_u32(
@@ -2204,8 +2205,8 @@ uint16x4_t test_vcreate_u16(uint64_t a)
 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
-uint32x2_t test_vcreate_u32(uint64_t a) {
-  return vclz_s32(vcreate_u32(a));
+int32x2_t test_vcreate_u32(uint64_t a) {
+  return vclz_s32((int32x2_t)vcreate_u32(a));
 }
 
 // CHECK-LABEL: @test_vcreate_u64(
@@ -2235,7 +2236,7 @@ poly8x8_t test_vcreate_p8(uint64_t a) {
 // CHECK:   ret <4 x i16> [[TMP4]]
 poly16x4_t test_vcreate_p16(uint64_t a) {
   poly16x4_t tmp = vcreate_p16(a);
-  return vbsl_p16(tmp, tmp, tmp);
+  return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
 }
 
 // CHECK-LABEL: @test_vcreate_s64(
@@ -2830,8 +2831,8 @@ int64x1_t test_vdup_n_s64(int64_t a) {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
 // CHECK:   ret <1 x i64> [[ADD_I]]
-uint64x1_t test_vdup_n_u64(uint64_t a) {
-  int64x1_t tmp = vdup_n_u64(a);
+int64x1_t test_vdup_n_u64(uint64_t a) {
+  int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
   return vadd_s64(tmp, tmp);
 }
 
@@ -2851,7 +2852,7 @@ int64x2_t test_vdupq_n_s64(int64_t a) {
 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
 // CHECK:   ret <2 x i64> [[ADD_I]]
 uint64x2_t test_vdupq_n_u64(uint64_t a) {
-  int64x2_t tmp = vdupq_n_u64(a);
+  uint64x2_t tmp = vdupq_n_u64(a);
   return vaddq_u64(tmp, tmp);
 }
 

Modified: cfe/trunk/test/Headers/arm-neon-header.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Headers/arm-neon-header.c?rev=374419&r1=374418&r2=374419&view=diff
==============================================================================
--- cfe/trunk/test/Headers/arm-neon-header.c (original)
+++ cfe/trunk/test/Headers/arm-neon-header.c Thu Oct 10 11:45:34 2019
@@ -20,7 +20,7 @@
 // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s
 // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s
 
-// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s
-// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s
+// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s
+// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s
 
 #include <arm_neon.h>

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=374419&r1=374418&r2=374419&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Thu Oct 10 11:45:34 2019
@@ -1413,7 +1413,7 @@ void Intrinsic::emitBodyAsBuiltinCall()
     if (T.getNumVectors() > 1) {
       // Check if an explicit cast is needed.
       std::string Cast;
-      if (T.isChar() || T.isPoly() || !T.isSigned()) {
+      if (LocalCK == ClassB) {
         Type T2 = T;
         T2.makeOneVector();
         T2.makeInteger(8, /*Signed=*/true);
@@ -1445,6 +1445,9 @@ void Intrinsic::emitBodyAsBuiltinCall()
     if (CastToType.isVector() && LocalCK == ClassB) {
       CastToType.makeInteger(8, true);
       Arg = "(" + CastToType.str() + ")" + Arg;
+    } else if (CastToType.isVector() && LocalCK == ClassI) {
+      CastToType.makeSigned();
+      Arg = "(" + CastToType.str() + ")" + Arg;
     }
 
     S += Arg + ", ";