r374191 - [ARM] Fix arm_neon.h with -flax-vector-conversions=none

Richard Smith via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 9 12:42:01 PDT 2019


Hi Eli,

Looks like this isn't enough to get <arm_neon.h> to work properly with
-flax-vector-conversions=integer across all subtargets. With this patch
applied, the following tests fail with -flax-vector-conversions=integer:

    Clang :: CodeGen/aarch64-neon-fp16fml.c
    Clang :: CodeGen/aarch64-neon-intrinsics.c
    Clang :: CodeGen/aarch64-neon-ldst-one.c
    Clang :: CodeGen/aarch64-v8.2a-neon-intrinsics.c
    Clang :: CodeGen/arm-neon-vst.c
    Clang :: CodeGen/arm64-vrnd.c
    Clang :: CodeGen/arm_neon_intrinsics.c

For example, with -target-feature +v8.2a -target-feature +neon
-target-feature +fp16fml we get:

.../build/lib/clang/10.0.0/include/arm_neon.h:37147:55: error: passing
'int8x16_t' (vector of 16 'int8_t' values) to parameter of incompatible
type '__attribute__((__vector_size__(4 * sizeof(float)))) float' (vector of
4 'float' values)
  __ret = (float32x4_t) __builtin_neon_vfmlalq_high_v((int8x16_t)__p0,
(int8x16_t)__p1, (int8x16_t)__p2, 41);
                                                      ^~~~~~~~~~~~~~~


On Wed, 9 Oct 2019 at 10:55, Eli Friedman via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: efriedma
> Date: Wed Oct  9 10:57:59 2019
> New Revision: 374191
>
> URL: http://llvm.org/viewvc/llvm-project?rev=374191&view=rev
> Log:
> [ARM] Fix arm_neon.h with -flax-vector-conversions=none
>
> Really, we were already 99% of the way there; just needed a couple minor
> fixes that affected 64-bit-only builtins.  Based on D61717.
>
> Note that the change to builtin_str changes the type of a few
> __builtin_neon_* intrinsics that had the "wrong" type.
>
> Fixes https://bugs.llvm.org/show_bug.cgi?id=43341
>
> Differential Revision: https://reviews.llvm.org/D68683
>
>
> Modified:
>     cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
>     cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
>     cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c
>     cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
>     cfe/trunk/test/CodeGen/aarch64-neon-vget.c
>     cfe/trunk/test/CodeGen/aarch64-poly64.c
>     cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
>     cfe/trunk/test/CodeGen/arm64-lanes.c
>     cfe/trunk/test/CodeGen/arm64_vcopy.c
>     cfe/trunk/test/CodeGen/arm_neon_intrinsics.c
>     cfe/trunk/test/Headers/arm-neon-header.c
>     cfe/trunk/utils/TableGen/NeonEmitter.cpp
>
> Modified: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-2velem.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Wed Oct  9 10:57:59 2019
> @@ -451,9 +451,7 @@ float64x2_t test_vfmsq_laneq_f64(float64
>  }
>
>  // CHECK-LABEL: @test_vfmas_laneq_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3
>  // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float
> [[EXTRACT]], float %a)
>  // CHECK:   ret float [[TMP2]]
>  float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
> @@ -462,9 +460,7 @@ float32_t test_vfmas_laneq_f32(float32_t
>
>  // CHECK-LABEL: @test_vfmsd_lane_f64(
>  // CHECK:   [[SUB:%.*]] = fsub double -0.000000e+00, %b
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> %v, i32 0
>  // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]],
> double [[EXTRACT]], double %a)
>  // CHECK:   ret double [[TMP2]]
>  float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) {
> @@ -473,9 +469,7 @@ float64_t test_vfmsd_lane_f64(float64_t
>
>  // CHECK-LABEL: @test_vfmss_laneq_f32(
>  // CHECK:   [[SUB:%.*]] = fsub float -0.000000e+00, %b
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3
>  // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float
> [[EXTRACT]], float %a)
>  // CHECK:   ret float [[TMP2]]
>  float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
> @@ -484,9 +478,7 @@ float32_t test_vfmss_laneq_f32(float32_t
>
>  // CHECK-LABEL: @test_vfmsd_laneq_f64(
>  // CHECK:   [[SUB:%.*]] = fsub double -0.000000e+00, %b
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> %v, i32 1
>  // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]],
> double [[EXTRACT]], double %a)
>  // CHECK:   ret double [[TMP2]]
>  float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) {
>
> Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Wed Oct  9 10:57:59
> 2019
> @@ -8703,7 +8703,6 @@ uint64_t test_vqrshld_u64(uint64_t a, ui
>  }
>
>  // CHECK-LABEL: @test_vpaddd_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
>  // CHECK:   [[VPADDD_S64_I:%.*]] = call i64
> @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
>  // CHECK:   ret i64 [[VPADDD_S64_I]]
>  int64_t test_vpaddd_s64(int64x2_t a) {
> @@ -8711,7 +8710,6 @@ int64_t test_vpaddd_s64(int64x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vpadds_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
>  // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
>  // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
> @@ -8721,7 +8719,6 @@ float32_t test_vpadds_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vpaddd_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
>  // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
>  // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
> @@ -8731,7 +8728,6 @@ float64_t test_vpaddd_f64(float64x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vpmaxnms_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VPMAXNMS_F32_I]]
>  float32_t test_vpmaxnms_f32(float32x2_t a) {
> @@ -8739,7 +8735,6 @@ float32_t test_vpmaxnms_f32(float32x2_t
>  }
>
>  // CHECK-LABEL: @test_vpmaxnmqd_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VPMAXNMQD_F64_I]]
>  float64_t test_vpmaxnmqd_f64(float64x2_t a) {
> @@ -8747,7 +8742,6 @@ float64_t test_vpmaxnmqd_f64(float64x2_t
>  }
>
>  // CHECK-LABEL: @test_vpmaxs_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VPMAXS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VPMAXS_F32_I]]
>  float32_t test_vpmaxs_f32(float32x2_t a) {
> @@ -8755,7 +8749,6 @@ float32_t test_vpmaxs_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vpmaxqd_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VPMAXQD_F64_I]]
>  float64_t test_vpmaxqd_f64(float64x2_t a) {
> @@ -8763,7 +8756,6 @@ float64_t test_vpmaxqd_f64(float64x2_t a
>  }
>
>  // CHECK-LABEL: @test_vpminnms_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VPMINNMS_F32_I]]
>  float32_t test_vpminnms_f32(float32x2_t a) {
> @@ -8771,7 +8763,6 @@ float32_t test_vpminnms_f32(float32x2_t
>  }
>
>  // CHECK-LABEL: @test_vpminnmqd_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VPMINNMQD_F64_I]]
>  float64_t test_vpminnmqd_f64(float64x2_t a) {
> @@ -8779,7 +8770,6 @@ float64_t test_vpminnmqd_f64(float64x2_t
>  }
>
>  // CHECK-LABEL: @test_vpmins_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VPMINS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VPMINS_F32_I]]
>  float32_t test_vpmins_f32(float32x2_t a) {
> @@ -8787,7 +8777,6 @@ float32_t test_vpmins_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vpminqd_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VPMINQD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VPMINQD_F64_I]]
>  float64_t test_vpminqd_f64(float64x2_t a) {
> @@ -17690,7 +17679,6 @@ int64x1_t test_vneg_s64(int64x1_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddv_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VADDV_F32_I:%.*]] = call float
> @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VADDV_F32_I]]
>  float32_t test_vaddv_f32(float32x2_t a) {
> @@ -17698,7 +17686,6 @@ float32_t test_vaddv_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vaddvq_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
>  // CHECK:   [[VADDVQ_F32_I:%.*]] = call float
> @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
>  // CHECK:   ret float [[VADDVQ_F32_I]]
>  float32_t test_vaddvq_f32(float32x4_t a) {
> @@ -17706,7 +17693,6 @@ float32_t test_vaddvq_f32(float32x4_t a)
>  }
>
>  // CHECK-LABEL: @test_vaddvq_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VADDVQ_F64_I:%.*]] = call double
> @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VADDVQ_F64_I]]
>  float64_t test_vaddvq_f64(float64x2_t a) {
> @@ -17714,7 +17700,6 @@ float64_t test_vaddvq_f64(float64x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vmaxv_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VMAXV_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VMAXV_F32_I]]
>  float32_t test_vmaxv_f32(float32x2_t a) {
> @@ -17722,7 +17707,6 @@ float32_t test_vmaxv_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vmaxvq_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VMAXVQ_F64_I]]
>  float64_t test_vmaxvq_f64(float64x2_t a) {
> @@ -17730,7 +17714,6 @@ float64_t test_vmaxvq_f64(float64x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vminv_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VMINV_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VMINV_F32_I]]
>  float32_t test_vminv_f32(float32x2_t a) {
> @@ -17738,7 +17721,6 @@ float32_t test_vminv_f32(float32x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vminvq_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VMINVQ_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VMINVQ_F64_I]]
>  float64_t test_vminvq_f64(float64x2_t a) {
> @@ -17746,7 +17728,6 @@ float64_t test_vminvq_f64(float64x2_t a)
>  }
>
>  // CHECK-LABEL: @test_vmaxnmvq_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VMAXNMVQ_F64_I]]
>  float64_t test_vmaxnmvq_f64(float64x2_t a) {
> @@ -17754,7 +17735,6 @@ float64_t test_vmaxnmvq_f64(float64x2_t
>  }
>
>  // CHECK-LABEL: @test_vmaxnmv_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VMAXNMV_F32_I]]
>  float32_t test_vmaxnmv_f32(float32x2_t a) {
> @@ -17762,7 +17742,6 @@ float32_t test_vmaxnmv_f32(float32x2_t a
>  }
>
>  // CHECK-LABEL: @test_vminnmvq_f64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
>  // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
>  // CHECK:   ret double [[VMINNMVQ_F64_I]]
>  float64_t test_vminnmvq_f64(float64x2_t a) {
> @@ -17770,7 +17749,6 @@ float64_t test_vminnmvq_f64(float64x2_t
>  }
>
>  // CHECK-LABEL: @test_vminnmv_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
>  // CHECK:   [[VMINNMV_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
>  // CHECK:   ret float [[VMINNMV_F32_I]]
>  float32_t test_vminnmv_f32(float32x2_t a) {
> @@ -17798,7 +17776,6 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a,
>  }
>
>  // CHECK-LABEL: @test_vpaddd_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
>  // CHECK:   [[VPADDD_U64_I:%.*]] = call i64
> @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
>  // CHECK:   ret i64 [[VPADDD_U64_I]]
>  uint64_t test_vpaddd_u64(uint64x2_t a) {
> @@ -17806,7 +17783,6 @@ uint64_t test_vpaddd_u64(uint64x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddvq_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
>  // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64
> @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
>  // CHECK:   ret i64 [[VADDVQ_S64_I]]
>  int64_t test_vaddvq_s64(int64x2_t a) {
> @@ -17814,7 +17790,6 @@ int64_t test_vaddvq_s64(int64x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddvq_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
>  // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64
> @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
>  // CHECK:   ret i64 [[VADDVQ_U64_I]]
>  uint64_t test_vaddvq_u64(uint64x2_t a) {
> @@ -18178,7 +18153,6 @@ float64x1_t test_vrsqrts_f64(float64x1_t
>  }
>
>  // CHECK-LABEL: @test_vminv_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VMINV_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VMINV_S32_I]]
>  int32_t test_vminv_s32(int32x2_t a) {
> @@ -18186,7 +18160,6 @@ int32_t test_vminv_s32(int32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vminv_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VMINV_U32_I:%.*]] = call i32
> @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VMINV_U32_I]]
>  uint32_t test_vminv_u32(uint32x2_t a) {
> @@ -18194,7 +18167,6 @@ uint32_t test_vminv_u32(uint32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vmaxv_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VMAXV_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VMAXV_S32_I]]
>  int32_t test_vmaxv_s32(int32x2_t a) {
> @@ -18202,7 +18174,6 @@ int32_t test_vmaxv_s32(int32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vmaxv_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VMAXV_U32_I:%.*]] = call i32
> @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VMAXV_U32_I]]
>  uint32_t test_vmaxv_u32(uint32x2_t a) {
> @@ -18210,7 +18181,6 @@ uint32_t test_vmaxv_u32(uint32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddv_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VADDV_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VADDV_S32_I]]
>  int32_t test_vaddv_s32(int32x2_t a) {
> @@ -18218,7 +18188,6 @@ int32_t test_vaddv_s32(int32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddv_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VADDV_U32_I:%.*]] = call i32
> @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
>  // CHECK:   ret i32 [[VADDV_U32_I]]
>  uint32_t test_vaddv_u32(uint32x2_t a) {
> @@ -18226,7 +18195,6 @@ uint32_t test_vaddv_u32(uint32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddlv_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VADDLV_S32_I:%.*]] = call i64
> @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
>  // CHECK:   ret i64 [[VADDLV_S32_I]]
>  int64_t test_vaddlv_s32(int32x2_t a) {
> @@ -18234,7 +18202,6 @@ int64_t test_vaddlv_s32(int32x2_t a) {
>  }
>
>  // CHECK-LABEL: @test_vaddlv_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
>  // CHECK:   [[VADDLV_U32_I:%.*]] = call i64
> @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
>  // CHECK:   ret i64 [[VADDLV_U32_I]]
>  uint64_t test_vaddlv_u32(uint32x2_t a) {
>
> Modified: cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c Wed Oct  9 10:57:59
> 2019
> @@ -4,9 +4,7 @@
>  #include <arm_neon.h>
>
>  // CHECK-LABEL: define float @test_vdups_lane_f32(<2 x float> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VDUPS_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32
> 1
> +// CHECK:   [[VDUPS_LANE:%.*]] = extractelement <2 x float> %a, i32 1
>  // CHECK:   ret float [[VDUPS_LANE]]
>  float32_t test_vdups_lane_f32(float32x2_t a) {
>    return vdups_lane_f32(a, 1);
> @@ -14,9 +12,7 @@ float32_t test_vdups_lane_f32(float32x2_
>
>
>  // CHECK-LABEL: define double @test_vdupd_lane_f64(<1 x double> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VDUPD_LANE:%.*]] = extractelement <1 x double> [[TMP1]],
> i32 0
> +// CHECK:   [[VDUPD_LANE:%.*]] = extractelement <1 x double> %a, i32 0
>  // CHECK:   ret double [[VDUPD_LANE]]
>  float64_t test_vdupd_lane_f64(float64x1_t a) {
>    return vdupd_lane_f64(a, 0);
> @@ -24,9 +20,7 @@ float64_t test_vdupd_lane_f64(float64x1_
>
>
>  // CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32
> 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
>  // CHECK:   ret float [[VGETQ_LANE]]
>  float32_t test_vdups_laneq_f32(float32x4_t a) {
>    return vdups_laneq_f32(a, 3);
> @@ -34,9 +28,7 @@ float32_t test_vdups_laneq_f32(float32x4
>
>
>  // CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]],
> i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a, i32 1
>  // CHECK:   ret double [[VGETQ_LANE]]
>  float64_t test_vdupd_laneq_f64(float64x2_t a) {
>    return vdupd_laneq_f64(a, 1);
> @@ -52,9 +44,7 @@ int8_t test_vdupb_lane_s8(int8x8_t a) {
>
>
>  // CHECK-LABEL: define i16 @test_vduph_lane_s16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  int16_t test_vduph_lane_s16(int16x4_t a) {
>    return vduph_lane_s16(a, 3);
> @@ -62,9 +52,7 @@ int16_t test_vduph_lane_s16(int16x4_t a)
>
>
>  // CHECK-LABEL: define i32 @test_vdups_lane_s32(<2 x i32> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  int32_t test_vdups_lane_s32(int32x2_t a) {
>    return vdups_lane_s32(a, 1);
> @@ -72,9 +60,7 @@ int32_t test_vdups_lane_s32(int32x2_t a)
>
>
>  // CHECK-LABEL: define i64 @test_vdupd_lane_s64(<1 x i64> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  int64_t test_vdupd_lane_s64(int64x1_t a) {
>    return vdupd_lane_s64(a, 0);
> @@ -90,9 +76,7 @@ uint8_t test_vdupb_lane_u8(uint8x8_t a)
>
>
>  // CHECK-LABEL: define i16 @test_vduph_lane_u16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  uint16_t test_vduph_lane_u16(uint16x4_t a) {
>    return vduph_lane_u16(a, 3);
> @@ -100,9 +84,7 @@ uint16_t test_vduph_lane_u16(uint16x4_t
>
>
>  // CHECK-LABEL: define i32 @test_vdups_lane_u32(<2 x i32> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  uint32_t test_vdups_lane_u32(uint32x2_t a) {
>    return vdups_lane_u32(a, 1);
> @@ -110,9 +92,7 @@ uint32_t test_vdups_lane_u32(uint32x2_t
>
>
>  // CHECK-LABEL: define i64 @test_vdupd_lane_u64(<1 x i64> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  uint64_t test_vdupd_lane_u64(uint64x1_t a) {
>    return vdupd_lane_u64(a, 0);
> @@ -127,9 +107,7 @@ int8_t test_vdupb_laneq_s8(int8x16_t a)
>
>
>  // CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  int16_t test_vduph_laneq_s16(int16x8_t a) {
>    return vduph_laneq_s16(a, 7);
> @@ -137,9 +115,7 @@ int16_t test_vduph_laneq_s16(int16x8_t a
>
>
>  // CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGETQ_LANE]]
>  int32_t test_vdups_laneq_s32(int32x4_t a) {
>    return vdups_laneq_s32(a, 3);
> @@ -147,9 +123,7 @@ int32_t test_vdups_laneq_s32(int32x4_t a
>
>
>  // CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGETQ_LANE]]
>  int64_t test_vdupd_laneq_s64(int64x2_t a) {
>    return vdupd_laneq_s64(a, 1);
> @@ -165,9 +139,7 @@ uint8_t test_vdupb_laneq_u8(uint8x16_t a
>
>
>  // CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  uint16_t test_vduph_laneq_u16(uint16x8_t a) {
>    return vduph_laneq_u16(a, 7);
> @@ -175,9 +147,7 @@ uint16_t test_vduph_laneq_u16(uint16x8_t
>
>
>  // CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGETQ_LANE]]
>  uint32_t test_vdups_laneq_u32(uint32x4_t a) {
>    return vdups_laneq_u32(a, 3);
> @@ -185,9 +155,7 @@ uint32_t test_vdups_laneq_u32(uint32x4_t
>
>
>  // CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGETQ_LANE]]
>  uint64_t test_vdupd_laneq_u64(uint64x2_t a) {
>    return vdupd_laneq_u64(a, 1);
> @@ -201,9 +169,7 @@ poly8_t test_vdupb_lane_p8(poly8x8_t a)
>  }
>
>  // CHECK-LABEL: define i16 @test_vduph_lane_p16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  poly16_t test_vduph_lane_p16(poly16x4_t a) {
>    return vduph_lane_p16(a, 3);
> @@ -217,9 +183,7 @@ poly8_t test_vdupb_laneq_p8(poly8x16_t a
>  }
>
>  // CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  poly16_t test_vduph_laneq_p16(poly16x8_t a) {
>    return vduph_laneq_p16(a, 7);
>
> Modified: cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c Wed Oct  9
> 10:57:59 2019
> @@ -7,9 +7,7 @@
>
>
>  // CHECK-LABEL: define float @test_vmuls_lane_f32(float %a, <2 x float>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
>  // CHECK:   [[MUL:%.*]] = fmul float %a, [[VGET_LANE]]
>  // CHECK:   ret float [[MUL]]
>  float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
> @@ -17,9 +15,7 @@ float32_t test_vmuls_lane_f32(float32_t
>  }
>
>  // CHECK-LABEL: define double @test_vmuld_lane_f64(double %a, <1 x
> double> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32
> 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
>  // CHECK:   [[MUL:%.*]] = fmul double %a, [[VGET_LANE]]
>  // CHECK:   ret double [[MUL]]
>  float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
> @@ -27,9 +23,7 @@ float64_t test_vmuld_lane_f64(float64_t
>  }
>
>  // CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32
> 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
>  // CHECK:   [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]]
>  // CHECK:   ret float [[MUL]]
>  float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
> @@ -37,9 +31,7 @@ float32_t test_vmuls_laneq_f32(float32_t
>  }
>
>  // CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x
> double> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]],
> i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
>  // CHECK:   [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]]
>  // CHECK:   ret double [[MUL]]
>  float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
> @@ -56,9 +48,7 @@ float64x1_t test_vmul_n_f64(float64x1_t
>  }
>
>  // CHECK-LABEL: define float @test_vmulxs_lane_f32(float %a, <2 x float>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
>  // CHECK:   [[VMULXS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
>  // CHECK:   ret float [[VMULXS_F32_I]]
>  float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
> @@ -66,9 +56,7 @@ float32_t test_vmulxs_lane_f32(float32_t
>  }
>
>  // CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32
> 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
>  // CHECK:   [[VMULXS_F32_I:%.*]] = call float
> @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
>  // CHECK:   ret float [[VMULXS_F32_I]]
>  float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
> @@ -76,9 +64,7 @@ float32_t test_vmulxs_laneq_f32(float32_
>  }
>
>  // CHECK-LABEL: define double @test_vmulxd_lane_f64(double %a, <1 x
> double> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32
> 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
>  // CHECK:   ret double [[VMULXD_F64_I]]
>  float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
> @@ -86,9 +72,7 @@ float64_t test_vmulxd_lane_f64(float64_t
>  }
>
>  // CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x
> double> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]],
> i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
>  // CHECK:   ret double [[VMULXD_F64_I]]
>  float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
> @@ -96,16 +80,10 @@ float64_t test_vmulxd_laneq_f64(float64_
>  }
>
>  // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64(<1 x double> %a,
> <1 x double> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32
> 0
> -// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
> -// CHECK:   [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]],
> i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
> +// CHECK:   [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
> -// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]],
> double [[VMULXD_F64_I]], i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double
> [[VMULXD_F64_I]], i32 0
>  // CHECK:   ret <1 x double> [[VSET_LANE]]
>  float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
>    return vmulx_lane_f64(a, b, 0);
> @@ -113,32 +91,20 @@ float64x1_t test_vmulx_lane_f64(float64x
>
>
>  // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double>
> %a, <2 x double> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32
> 0
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]],
> i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
> -// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]],
> double [[VMULXD_F64_I]], i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double
> [[VMULXD_F64_I]], i32 0
>  // CHECK:   ret <1 x double> [[VSET_LANE]]
>  float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
>    return vmulx_laneq_f64(a, b, 0);
>  }
>
>  // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double>
> %a, <2 x double> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32
> 0
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]],
> i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
> -// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]],
> double [[VMULXD_F64_I]], i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double
> [[VMULXD_F64_I]], i32 0
>  // CHECK:   ret <1 x double> [[VSET_LANE]]
>  float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
>    return vmulx_laneq_f64(a, b, 1);
> @@ -146,9 +112,7 @@ float64x1_t test_vmulx_laneq_f64_1(float
>
>
>  // CHECK-LABEL: define float @test_vfmas_lane_f32(float %a, float %b, <2
> x float> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
>  // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float
> [[EXTRACT]], float %a)
>  // CHECK:   ret float [[TMP2]]
>  float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
> @@ -156,9 +120,7 @@ float32_t test_vfmas_lane_f32(float32_t
>  }
>
>  // CHECK-LABEL: define double @test_vfmad_lane_f64(double %a, double %b,
> <1 x double> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
>  // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double
> [[EXTRACT]], double %a)
>  // CHECK:   ret double [[TMP2]]
>  float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
> @@ -166,9 +128,7 @@ float64_t test_vfmad_lane_f64(float64_t
>  }
>
>  // CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b,
> <2 x double> %c) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
>  // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double
> [[EXTRACT]], double %a)
>  // CHECK:   ret double [[TMP2]]
>  float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
> @@ -177,9 +137,7 @@ float64_t test_vfmad_laneq_f64(float64_t
>
>  // CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2
> x float> %c) #0 {
>  // CHECK:   [[SUB:%.*]] = fsub float -0.000000e+00, %b
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
>  // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float
> [[EXTRACT]], float %a)
>  // CHECK:   ret float [[TMP2]]
>  float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
> @@ -247,9 +205,7 @@ float64x1_t test_vfms_laneq_f64(float64x
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmullh_lane_s16(i16 %a, <4 x i16> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGET_LANE]], i64 0
>  // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -260,9 +216,7 @@ int32_t test_vqdmullh_lane_s16(int16_t a
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmulls_lane_s32(i32 %a, <2 x i32> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
>  // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
>  // CHECK:   ret i64 [[VQDMULLS_S32_I]]
>  int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
> @@ -270,9 +224,7 @@ int64_t test_vqdmulls_lane_s32(int32_t a
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b)
> #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGETQ_LANE]], i64 0
>  // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -283,9 +235,7 @@ int32_t test_vqdmullh_laneq_s16(int16_t
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b)
> #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
>  // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
>  // CHECK:   ret i64 [[VQDMULLS_S32_I]]
>  int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
> @@ -293,9 +243,7 @@ int64_t test_vqdmulls_laneq_s32(int32_t
>  }
>
>  // CHECK-LABEL: define i16 @test_vqdmulhh_lane_s16(i16 %a, <4 x i16> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGET_LANE]], i64 0
>  // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16>
> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -306,9 +254,7 @@ int16_t test_vqdmulhh_lane_s16(int16_t a
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmulhs_lane_s32(i32 %a, <2 x i32> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
>  // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
>  // CHECK:   ret i32 [[VQDMULHS_S32_I]]
>  int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
> @@ -317,9 +263,7 @@ int32_t test_vqdmulhs_lane_s32(int32_t a
>
>
>  // CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b)
> #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGETQ_LANE]], i64 0
>  // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16>
> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -331,9 +275,7 @@ int16_t test_vqdmulhh_laneq_s16(int16_t
>
>
>  // CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b)
> #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
>  // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
>  // CHECK:   ret i32 [[VQDMULHS_S32_I]]
>  int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
> @@ -341,9 +283,7 @@ int32_t test_vqdmulhs_laneq_s32(int32_t
>  }
>
>  // CHECK-LABEL: define i16 @test_vqrdmulhh_lane_s16(i16 %a, <4 x i16> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGET_LANE]], i64 0
>  // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16>
> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -354,9 +294,7 @@ int16_t test_vqrdmulhh_lane_s16(int16_t
>  }
>
>  // CHECK-LABEL: define i32 @test_vqrdmulhs_lane_s32(i32 %a, <2 x i32> %b)
> #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
>  // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
>  // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
>  int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
> @@ -365,9 +303,7 @@ int32_t test_vqrdmulhs_lane_s32(int32_t
>
>
>  // CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16
> [[VGETQ_LANE]], i64 0
>  // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16>
> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -379,9 +315,7 @@ int16_t test_vqrdmulhh_laneq_s16(int16_t
>
>
>  // CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
>  // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32
> @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
>  // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
>  int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
> @@ -389,9 +323,7 @@ int32_t test_vqrdmulhs_laneq_s32(int32_t
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmlalh_lane_s16(i32 %a, i16 %b, <4 x
> i16> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]],
> i64 0
>  // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -403,9 +335,7 @@ int32_t test_vqdmlalh_lane_s16(int32_t a
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmlals_lane_s32(i64 %a, i32 %b, <2 x
> i32> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
>  // CHECK:   [[VQDMLXL:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
>  // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64
> %a, i64 [[VQDMLXL]])
>  // CHECK:   ret i64 [[VQDMLXL1]]
> @@ -414,9 +344,7 @@ int64_t test_vqdmlals_lane_s32(int64_t a
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x
> i16> %c) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]],
> i64 0
>  // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -428,9 +356,7 @@ int32_t test_vqdmlalh_laneq_s16(int32_t
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x
> i32> %c) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
>  // CHECK:   [[VQDMLXL:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
>  // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64
> %a, i64 [[VQDMLXL]])
>  // CHECK:   ret i64 [[VQDMLXL1]]
> @@ -439,9 +365,7 @@ int64_t test_vqdmlals_laneq_s32(int64_t
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmlslh_lane_s16(i32 %a, i16 %b, <4 x
> i16> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]],
> i64 0
>  // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -453,9 +377,7 @@ int32_t test_vqdmlslh_lane_s16(int32_t a
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmlsls_lane_s32(i64 %a, i32 %b, <2 x
> i32> %c) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
>  // CHECK:   [[VQDMLXL:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
>  // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64
> %a, i64 [[VQDMLXL]])
>  // CHECK:   ret i64 [[VQDMLXL1]]
> @@ -464,9 +386,7 @@ int64_t test_vqdmlsls_lane_s32(int64_t a
>  }
>
>  // CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x
> i16> %c) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
>  // CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
>  // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]],
> i64 0
>  // CHECK:   [[VQDMLXL:%.*]] = call <4 x i32>
> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
> @@ -478,9 +398,7 @@ int32_t test_vqdmlslh_laneq_s16(int32_t
>  }
>
>  // CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x
> i32> %c) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
>  // CHECK:   [[VQDMLXL:%.*]] = call i64
> @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
>  // CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64
> %a, i64 [[VQDMLXL]])
>  // CHECK:   ret i64 [[VQDMLXL1]]
> @@ -491,16 +409,10 @@ int64_t test_vqdmlsls_laneq_s32(int64_t
>  // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 {
>  // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
>  // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
> -// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32
> 0
> -// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8>
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
> -// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]],
> i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32
> 0
> +// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]],
> i32 0
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
> -// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
> -// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]],
> double [[VMULXD_F64_I]], i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]],
> double [[VMULXD_F64_I]], i32 0
>  // CHECK:   ret <1 x double> [[VSET_LANE]]
>  float64x1_t test_vmulx_lane_f64_0() {
>        float64x1_t arg1;
> @@ -517,16 +429,10 @@ float64x1_t test_vmulx_lane_f64_0() {
>  // CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
>  // CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
>  // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x
> double> [[TMP1]], <2 x i32> <i32 0, i32 1>
> -// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32
> 0
> -// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[SHUFFLE_I]] to <16 x i8>
> -// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP5]],
> i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32
> 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double>
> [[SHUFFLE_I]], i32 1
>  // CHECK:   [[VMULXD_F64_I:%.*]] = call double
> @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
> -// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
> -// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]],
> double [[VMULXD_F64_I]], i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]],
> double [[VMULXD_F64_I]], i32 0
>  // CHECK:   ret <1 x double> [[VSET_LANE]]
>  float64x1_t test_vmulx_laneq_f64_2() {
>        float64x1_t arg1;
>
> Modified: cfe/trunk/test/CodeGen/aarch64-neon-vget.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-vget.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-neon-vget.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-neon-vget.c Wed Oct  9 10:57:59 2019
> @@ -12,18 +12,14 @@ uint8_t test_vget_lane_u8(uint8x8_t a) {
>  }
>
>  // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  uint16_t test_vget_lane_u16(uint16x4_t a) {
>    return vget_lane_u16(a, 3);
>  }
>
>  // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  uint32_t test_vget_lane_u32(uint32x2_t a) {
>    return vget_lane_u32(a, 1);
> @@ -37,18 +33,14 @@ int8_t test_vget_lane_s8(int8x8_t a) {
>  }
>
>  // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  int16_t test_vget_lane_s16(int16x4_t a) {
>    return vget_lane_s16(a, 3);
>  }
>
>  // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  int32_t test_vget_lane_s32(int32x2_t a) {
>    return vget_lane_s32(a, 1);
> @@ -62,18 +54,14 @@ poly8_t test_vget_lane_p8(poly8x8_t a) {
>  }
>
>  // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  poly16_t test_vget_lane_p16(poly16x4_t a) {
>    return vget_lane_p16(a, 3);
>  }
>
>  // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
>  // CHECK:   ret float [[VGET_LANE]]
>  float32_t test_vget_lane_f32(float32x2_t a) {
>    return vget_lane_f32(a, 1);
> @@ -85,9 +73,7 @@ float32_t test_vget_lane_f32(float32x2_t
>  // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
>  // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x
> i16>*
>  // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
> -// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
>  // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
>  // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
>  // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
> @@ -105,18 +91,14 @@ uint8_t test_vgetq_lane_u8(uint8x16_t a)
>  }
>
>  // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  uint16_t test_vgetq_lane_u16(uint16x8_t a) {
>    return vgetq_lane_u16(a, 7);
>  }
>
>  // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGETQ_LANE]]
>  uint32_t test_vgetq_lane_u32(uint32x4_t a) {
>    return vgetq_lane_u32(a, 3);
> @@ -130,18 +112,14 @@ int8_t test_vgetq_lane_s8(int8x16_t a) {
>  }
>
>  // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  int16_t test_vgetq_lane_s16(int16x8_t a) {
>    return vgetq_lane_s16(a, 7);
>  }
>
>  // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGETQ_LANE]]
>  int32_t test_vgetq_lane_s32(int32x4_t a) {
>    return vgetq_lane_s32(a, 3);
> @@ -155,18 +133,14 @@ poly8_t test_vgetq_lane_p8(poly8x16_t a)
>  }
>
>  // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGETQ_LANE]]
>  poly16_t test_vgetq_lane_p16(poly16x8_t a) {
>    return vgetq_lane_p16(a, 7);
>  }
>
>  // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32
> 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
>  // CHECK:   ret float [[VGETQ_LANE]]
>  float32_t test_vgetq_lane_f32(float32x4_t a) {
>    return vgetq_lane_f32(a, 3);
> @@ -178,9 +152,7 @@ float32_t test_vgetq_lane_f32(float32x4_
>  // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
>  // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x
> i16>*
>  // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
> -// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
>  // CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
>  // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
>  // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
> @@ -191,36 +163,28 @@ float32_t test_vgetq_lane_f16(float16x8_
>  }
>
>  // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  int64_t test_vget_lane_s64(int64x1_t a) {
>    return vget_lane_s64(a, 0);
>  }
>
>  // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  uint64_t test_vget_lane_u64(uint64x1_t a) {
>    return vget_lane_u64(a, 0);
>  }
>
>  // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGETQ_LANE]]
>  int64_t test_vgetq_lane_s64(int64x2_t a) {
>    return vgetq_lane_s64(a, 1);
>  }
>
>  // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGETQ_LANE]]
>  uint64_t test_vgetq_lane_u64(uint64x2_t a) {
>    return vgetq_lane_u64(a, 1);
> @@ -235,18 +199,14 @@ uint8x8_t test_vset_lane_u8(uint8_t a, u
>  }
>
>  // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
>    return vset_lane_u16(a, b, 3);
>  }
>
>  // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
>  // CHECK:   ret <2 x i32> [[VSET_LANE]]
>  uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
>    return vset_lane_u32(a, b, 1);
> @@ -260,18 +220,14 @@ int8x8_t test_vset_lane_s8(int8_t a, int
>  }
>
>  // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
>    return vset_lane_s16(a, b, 3);
>  }
>
>  // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
>  // CHECK:   ret <2 x i32> [[VSET_LANE]]
>  int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
>    return vset_lane_s32(a, b, 1);
> @@ -285,18 +241,14 @@ poly8x8_t test_vset_lane_p8(poly8_t a, p
>  }
>
>  // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
>    return vset_lane_p16(a, b, 3);
>  }
>
>  // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x
> float> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float
> %a, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a,
> i32 1
>  // CHECK:   ret <2 x float> [[VSET_LANE]]
>  float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
>    return vset_lane_f32(a, b, 1);
> @@ -313,9 +265,7 @@ float32x2_t test_vset_lane_f32(float32_t
>  // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
>  // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x
> i16>*
>  // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
> -// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
> -// CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16
> [[TMP2]], i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16
> [[TMP2]], i32 3
>  // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]],
> align 8
>  // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x
> half>*
>  // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
> @@ -332,18 +282,14 @@ uint8x16_t test_vsetq_lane_u8(uint8_t a,
>  }
>
>  // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
>    return vsetq_lane_u16(a, b, 7);
>  }
>
>  // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
>    return vsetq_lane_u32(a, b, 3);
> @@ -357,18 +303,14 @@ int8x16_t test_vsetq_lane_s8(int8_t a, i
>  }
>
>  // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
>    return vsetq_lane_s16(a, b, 7);
>  }
>
>  // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
>    return vsetq_lane_s32(a, b, 3);
> @@ -382,18 +324,14 @@ poly8x16_t test_vsetq_lane_p8(poly8_t a,
>  }
>
>  // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
>    return vsetq_lane_p16(a, b, 7);
>  }
>
>  // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x
> float> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float
> %a, i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a,
> i32 3
>  // CHECK:   ret <4 x float> [[VSET_LANE]]
>  float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
>    return vsetq_lane_f32(a, b, 3);
> @@ -410,9 +348,7 @@ float32x4_t test_vsetq_lane_f32(float32_
>  // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
>  // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x
> i16>*
>  // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
> -// CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16
> [[TMP2]], i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16
> [[TMP2]], i32 7
>  // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]],
> align 16
>  // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x
> half>*
>  // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
> @@ -422,36 +358,28 @@ float16x8_t test_vsetq_lane_f16(float16_
>  }
>
>  // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a,
> i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
>    return vset_lane_s64(a, b, 0);
>  }
>
>  // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64>
> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a,
> i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
>    return vset_lane_u64(a, b, 0);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
>    return vsetq_lane_s64(a, b, 1);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64>
> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
>    return vsetq_lane_u64(a, b, 1);
>
> Modified: cfe/trunk/test/CodeGen/aarch64-poly64.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-poly64.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-poly64.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-poly64.c Wed Oct  9 10:57:59 2019
> @@ -61,48 +61,36 @@ poly64x2_t test_vbslq_p64(poly64x2_t a,
>  }
>
>  // CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  poly64_t test_vget_lane_p64(poly64x1_t v) {
>    return vget_lane_p64(v, 0);
>  }
>
>  // CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
>  // CHECK:   ret i64 [[VGETQ_LANE]]
>  poly64_t test_vgetq_lane_p64(poly64x2_t v) {
>    return vgetq_lane_p64(v, 1);
>  }
>
>  // CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64>
> %v) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a,
> i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
>    return vset_lane_p64(a, v, 0);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64>
> %v) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
>    return vsetq_lane_p64(a, v, 1);
>  }
>
>  // CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x
> i64> %b) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> -// CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64
> [[VGET_LANE]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64
> [[VGET_LANE]], i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
>    return vcopy_lane_p64(a, 0, b, 0);
> @@ -110,24 +98,16 @@ poly64x1_t test_vcopy_lane_p64(poly64x1_
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x
> i64> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64
> [[VGET_LANE]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64
> [[VGET_LANE]], i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
>    return vcopyq_lane_p64(a, 1, b, 0);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2
> x i64> %b) #1 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64
> [[VGETQ_LANE]], i32 1
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64
> [[VGETQ_LANE]], i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
>    return vcopyq_laneq_p64(a, 1, b, 1);
>
> Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original)
> +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Wed Oct  9
> 10:57:59 2019
> @@ -960,9 +960,7 @@ float16x8_t test_vfmaq_n_f16(float16x8_t
>  }
>
>  // CHECK-LABEL: test_vfmah_lane_f16
> -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8>
> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
> +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
>  // CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]],
> half %a)
>  // CHECK: ret half [[FMA]]
>  float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
> @@ -970,9 +968,7 @@ float16_t test_vfmah_lane_f16(float16_t
>  }
>
>  // CHECK-LABEL: test_vfmah_laneq_f16
> -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8>
> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
> +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
>  // CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]],
> half %a)
>  // CHECK: ret half [[FMA]]
>  float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
> @@ -1071,9 +1067,7 @@ float16x8_t test_vfmsq_n_f16(float16x8_t
>  // CHECK: [[TMP0:%.*]] = fpext half %b to float
>  // CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
>  // CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
> -// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3
> +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
>  // CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half
> [[EXTR]], half %a)
>  // CHECK: ret half [[FMA]]
>  float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
> @@ -1084,9 +1078,7 @@ float16_t test_vfmsh_lane_f16(float16_t
>  // CHECK: [[TMP0:%.*]] = fpext half %b to float
>  // CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
>  // CHECK: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
> -// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7
> +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
>  // CHECK: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half
> [[EXTR]], half %a)
>  // CHECK: ret half [[FMA]]
>  float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
> @@ -1231,9 +1223,7 @@ float16x8_t test_vmulxq_n_f16(float16x8_
>  }
>
>  // CHECK-LABEL: test_vmulxh_lane_f16
> -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
> +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %b, i32 3
>  // CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a,
> half [[EXTR]]
>  // CHECK: ret half [[MULX]]
>  float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
> @@ -1241,9 +1231,7 @@ float16_t test_vmulxh_lane_f16(float16_t
>  }
>
>  // CHECK-LABEL: test_vmulxh_laneq_f16
> -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
> +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %b, i32 7
>  // CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a,
> half [[EXTR]])
>  // CHECK: ret half [[MULX]]
>  float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
>
> Modified: cfe/trunk/test/CodeGen/arm64-lanes.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64-lanes.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm64-lanes.c (original)
> +++ cfe/trunk/test/CodeGen/arm64-lanes.c Wed Oct  9 10:57:59 2019
> @@ -9,7 +9,7 @@ int8_t test_vdupb_lane_s8(int8x8_t src)
>    // CHECK: extractelement <8 x i8> %src, i32 2
>
>    // CHECK-BE-LABEL: @test_vdupb_lane_s8
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32>
> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src,
> <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
>    // CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
>  }
>
> @@ -19,109 +19,83 @@ uint8_t test_vdupb_lane_u8(uint8x8_t src
>    // CHECK: extractelement <8 x i8> %src, i32 2
>
>    // CHECK-BE-LABEL: @test_vdupb_lane_u8
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32>
> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src,
> <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
>    // CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
>  }
>
>  int16_t test_vduph_lane_s16(int16x4_t src) {
>    return vduph_lane_s16(src, 2);
>    // CHECK-LABEL: @test_vduph_lane_s16
> -  // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
> -  // CHECK: extractelement <4 x i16> [[TMP2]], i32 2
> +  // CHECK: extractelement <4 x i16> %src, i32 2
>
>    // CHECK-BE-LABEL: @test_vduph_lane_s16
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32>
> <i32 3, i32 2, i32 1, i32 0>
> -  // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
> -  // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
> -  // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16>
> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
> +  // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
>  }
>
>  uint16_t test_vduph_lane_u16(uint16x4_t src) {
>    return vduph_lane_u16(src, 2);
>    // CHECK-LABEL: @test_vduph_lane_u16
> -  // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
> -  // CHECK: extractelement <4 x i16> [[TMP2]], i32 2
> +  // CHECK: extractelement <4 x i16> %src, i32 2
>
>    // CHECK-BE-LABEL: @test_vduph_lane_u16
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32>
> <i32 3, i32 2, i32 1, i32 0>
> -  // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
> -  // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
> -  // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16>
> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
> +  // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
>  }
>
>  int32_t test_vdups_lane_s32(int32x2_t src) {
>    return vdups_lane_s32(src, 0);
>    // CHECK-LABEL: @test_vdups_lane_s32
> -  // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
> -  // CHECK: extractelement <2 x i32> [[TMP2]], i32 0
> +  // CHECK: extractelement <2 x i32> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdups_lane_s32
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32>
> <i32 1, i32 0>
> -  // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
> -  // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
> -  // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32>
> %src, <2 x i32> <i32 1, i32 0>
> +  // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
>  }
>
>  uint32_t test_vdups_lane_u32(uint32x2_t src) {
>    return vdups_lane_u32(src, 0);
>    // CHECK-LABEL: @test_vdups_lane_u32
> -  // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
> -  // CHECK: extractelement <2 x i32> [[TMP2]], i32 0
> +  // CHECK: extractelement <2 x i32> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdups_lane_u32
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32>
> <i32 1, i32 0>
> -  // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
> -  // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
> -  // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32>
> %src, <2 x i32> <i32 1, i32 0>
> +  // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
>  }
>
>  float32_t test_vdups_lane_f32(float32x2_t src) {
>    return vdups_lane_f32(src, 0);
>    // CHECK-LABEL: @test_vdups_lane_f32
> -  // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
> -  // CHECK: extractelement <2 x float> [[TMP2]], i32 0
> +  // CHECK: extractelement <2 x float> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdups_lane_f32
> -  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> {{.*}}, <2 x i32>
> <i32 1, i32 0>
> -  // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x float> [[REV]] to [[TYPE:.*]]
> -  // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
> -  // CHECK-BE: extractelement <2 x float> [[TMP2]], i32 0
> +  // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> %src, <2 x float>
> %src, <2 x i32> <i32 1, i32 0>
> +  // CHECK-BE: extractelement <2 x float> [[REV]], i32 0
>  }
>
>  int64_t test_vdupd_lane_s64(int64x1_t src) {
>    return vdupd_lane_s64(src, 0);
>    // CHECK-LABEL: @test_vdupd_lane_s64
> -  // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
> -  // CHECK: extractelement <1 x i64> [[TMP2]], i32 0
> +  // CHECK: extractelement <1 x i64> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdupd_lane_s64
> -  // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
> +  // CHECK-BE: extractelement <1 x i64> %src, i32 0
>  }
>
>  uint64_t test_vdupd_lane_u64(uint64x1_t src) {
>    return vdupd_lane_u64(src, 0);
>    // CHECK-LABEL: @test_vdupd_lane_u64
> -  // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
> -  // CHECK: extractelement <1 x i64> [[TMP2]], i32 0
> +  // CHECK: extractelement <1 x i64> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdupd_lane_u64
> -  // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
> +  // CHECK-BE: extractelement <1 x i64> %src, i32 0
>  }
>
>  float64_t test_vdupd_lane_f64(float64x1_t src) {
>    return vdupd_lane_f64(src, 0);
>    // CHECK-LABEL: @test_vdupd_lane_f64
> -  // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %src to [[TYPE:.*]]
> -  // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x double>
> -  // CHECK: extractelement <1 x double> [[TMP2]], i32 0
> +  // CHECK: extractelement <1 x double> %src, i32 0
>
>    // CHECK-BE-LABEL: @test_vdupd_lane_f64
> -  // CHECK-BE: extractelement <1 x double> {{.*}}, i32 0
> +  // CHECK-BE: extractelement <1 x double> %src, i32 0
>  }
>
> Modified: cfe/trunk/test/CodeGen/arm64_vcopy.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64_vcopy.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm64_vcopy.c (original)
> +++ cfe/trunk/test/CodeGen/arm64_vcopy.c Wed Oct  9 10:57:59 2019
> @@ -22,12 +22,8 @@ uint8x16_t test_vcopyq_laneq_u8(uint8x16
>  }
>
>  // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_s16(<8 x i16> %a1, <8
> x i16> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> -// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16
> [[VGETQ_LANE]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16
> [[VGETQ_LANE]], i32 3
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  int16x8_t test_vcopyq_laneq_s16(int16x8_t a1, int16x8_t a2) {
>    return vcopyq_laneq_s16(a1, (int64_t) 3, a2, (int64_t) 7);
> @@ -35,12 +31,8 @@ int16x8_t test_vcopyq_laneq_s16(int16x8_
>  }
>
>  // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_u16(<8 x i16> %a1, <8
> x i16> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> -// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16
> [[VGETQ_LANE]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16
> [[VGETQ_LANE]], i32 3
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  uint16x8_t test_vcopyq_laneq_u16(uint16x8_t a1, uint16x8_t a2) {
>    return vcopyq_laneq_u16(a1, (int64_t) 3, a2, (int64_t) 7);
> @@ -48,72 +40,48 @@ uint16x8_t test_vcopyq_laneq_u16(uint16x
>  }
>
>  // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_s32(<4 x i32> %a1, <4
> x i32> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> -// CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32
> [[VGETQ_LANE]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32
> [[VGETQ_LANE]], i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  int32x4_t test_vcopyq_laneq_s32(int32x4_t a1, int32x4_t a2) {
>    return vcopyq_laneq_s32(a1, (int64_t) 3, a2, (int64_t) 3);
>  }
>
>  // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_u32(<4 x i32> %a1, <4
> x i32> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> -// CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32
> [[VGETQ_LANE]], i32 3
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32
> [[VGETQ_LANE]], i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  uint32x4_t test_vcopyq_laneq_u32(uint32x4_t a1, uint32x4_t a2) {
>    return vcopyq_laneq_u32(a1, (int64_t) 3, a2, (int64_t) 3);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_s64(<2 x i64> %a1, <2
> x i64> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64
> [[VGETQ_LANE]], i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64
> [[VGETQ_LANE]], i32 0
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  int64x2_t test_vcopyq_laneq_s64(int64x2_t a1, int64x2_t a2) {
>    return vcopyq_laneq_s64(a1, (int64_t) 0, a2, (int64_t) 1);
>  }
>
>  // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_u64(<2 x i64> %a1, <2
> x i64> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64
> [[VGETQ_LANE]], i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64
> [[VGETQ_LANE]], i32 0
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  uint64x2_t test_vcopyq_laneq_u64(uint64x2_t a1, uint64x2_t a2) {
>    return vcopyq_laneq_u64(a1, (int64_t) 0, a2, (int64_t) 1);
>  }
>
>  // CHECK-LABEL: define <4 x float> @test_vcopyq_laneq_f32(<4 x float>
> %a1, <4 x float> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32
> 3
> -// CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP3]], float
> [[VGETQ_LANE]], i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a2, i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %a1, float
> [[VGETQ_LANE]], i32 0
>  // CHECK:   ret <4 x float> [[VSET_LANE]]
>  float32x4_t test_vcopyq_laneq_f32(float32x4_t a1, float32x4_t a2) {
>    return vcopyq_laneq_f32(a1, 0, a2, 3);
>  }
>
>  // CHECK-LABEL: define <2 x double> @test_vcopyq_laneq_f64(<2 x double>
> %a1, <2 x double> %a2) #0 {
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a2 to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
> -// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]],
> i32 1
> -// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %a1 to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x double> [[TMP3]],
> double [[VGETQ_LANE]], i32 0
> +// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a2, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x double> %a1, double
> [[VGETQ_LANE]], i32 0
>  // CHECK:   ret <2 x double> [[VSET_LANE]]
>  float64x2_t test_vcopyq_laneq_f64(float64x2_t a1, float64x2_t a2) {
>    return vcopyq_laneq_f64(a1, 0, a2, 1);
>
> Modified: cfe/trunk/test/CodeGen/arm_neon_intrinsics.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm_neon_intrinsics.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm_neon_intrinsics.c (original)
> +++ cfe/trunk/test/CodeGen/arm_neon_intrinsics.c Wed Oct  9 10:57:59 2019
> @@ -3319,18 +3319,14 @@ uint8_t test_vget_lane_u8(uint8x8_t a) {
>  }
>
>  // CHECK-LABEL: @test_vget_lane_u16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  uint16_t test_vget_lane_u16(uint16x4_t a) {
>    return vget_lane_u16(a, 3);
>  }
>
>  // CHECK-LABEL: @test_vget_lane_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  uint32_t test_vget_lane_u32(uint32x2_t a) {
>    return vget_lane_u32(a, 1);
> @@ -3344,18 +3340,14 @@ int8_t test_vget_lane_s8(int8x8_t a) {
>  }
>
>  // CHECK-LABEL: @test_vget_lane_s16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  int16_t test_vget_lane_s16(int16x4_t a) {
>    return vget_lane_s16(a, 3);
>  }
>
>  // CHECK-LABEL: @test_vget_lane_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
>  // CHECK:   ret i32 [[VGET_LANE]]
>  int32_t test_vget_lane_s32(int32x2_t a) {
>    return vget_lane_s32(a, 1);
> @@ -3369,18 +3361,14 @@ poly8_t test_vget_lane_p8(poly8x8_t a) {
>  }
>
>  // CHECK-LABEL: @test_vget_lane_p16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
>  // CHECK:   ret i16 [[VGET_LANE]]
>  poly16_t test_vget_lane_p16(poly16x4_t a) {
>    return vget_lane_p16(a, 3);
>  }
>
>  // CHECK-LABEL: @test_vget_lane_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
>  // CHECK:   ret float [[VGET_LANE]]
>  float32_t test_vget_lane_f32(float32x2_t a) {
>    return vget_lane_f32(a, 1);
> @@ -3392,9 +3380,7 @@ float32_t test_vget_lane_f32(float32x2_t
>  // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
>  // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x
> i16>*
>  // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
> -// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
>  // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
>  // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
>  // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
> @@ -3412,18 +3398,14 @@ uint8_t test_vgetq_lane_u8(uint8x16_t a)
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_u16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGET_LANE]]
>  uint16_t test_vgetq_lane_u16(uint16x8_t a) {
>    return vgetq_lane_u16(a, 7);
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGET_LANE]]
>  uint32_t test_vgetq_lane_u32(uint32x4_t a) {
>    return vgetq_lane_u32(a, 3);
> @@ -3437,18 +3419,14 @@ int8_t test_vgetq_lane_s8(int8x16_t a) {
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_s16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGET_LANE]]
>  int16_t test_vgetq_lane_s16(int16x8_t a) {
>    return vgetq_lane_s16(a, 7);
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
>  // CHECK:   ret i32 [[VGET_LANE]]
>  int32_t test_vgetq_lane_s32(int32x4_t a) {
>    return vgetq_lane_s32(a, 3);
> @@ -3462,18 +3440,14 @@ poly8_t test_vgetq_lane_p8(poly8x16_t a)
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_p16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
>  // CHECK:   ret i16 [[VGET_LANE]]
>  poly16_t test_vgetq_lane_p16(poly16x8_t a) {
>    return vgetq_lane_p16(a, 7);
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
>  // CHECK:   ret float [[VGET_LANE]]
>  float32_t test_vgetq_lane_f32(float32x4_t a) {
>    return vgetq_lane_f32(a, 3);
> @@ -3485,9 +3459,7 @@ float32_t test_vgetq_lane_f32(float32x4_
>  // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
>  // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x
> i16>*
>  // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
> -// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
> -// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
>  // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
>  // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
>  // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
> @@ -3498,36 +3470,28 @@ float32_t test_vgetq_lane_f16(float16x8_
>  }
>
>  // CHECK-LABEL: @test_vget_lane_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  int64_t test_vget_lane_s64(int64x1_t a) {
>    return vget_lane_s64(a, 0);
>  }
>
>  // CHECK-LABEL: @test_vget_lane_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
>  // CHECK:   ret i64 [[VGET_LANE]]
>  uint64_t test_vget_lane_u64(uint64x1_t a) {
>    return vget_lane_u64(a, 0);
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGET_LANE]]
>  int64_t test_vgetq_lane_s64(int64x2_t a) {
>    return vgetq_lane_s64(a, 1);
>  }
>
>  // CHECK-LABEL: @test_vgetq_lane_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
> +// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
>  // CHECK:   ret i64 [[VGET_LANE]]
>  uint64_t test_vgetq_lane_u64(uint64x2_t a) {
>    return vgetq_lane_u64(a, 1);
> @@ -14061,18 +14025,14 @@ uint8x8_t test_vset_lane_u8(uint8_t a, u
>  }
>
>  // CHECK-LABEL: @test_vset_lane_u16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
>    return vset_lane_u16(a, b, 3);
>  }
>
>  // CHECK-LABEL: @test_vset_lane_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
>  // CHECK:   ret <2 x i32> [[VSET_LANE]]
>  uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
>    return vset_lane_u32(a, b, 1);
> @@ -14086,18 +14046,14 @@ int8x8_t test_vset_lane_s8(int8_t a, int
>  }
>
>  // CHECK-LABEL: @test_vset_lane_s16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
>    return vset_lane_s16(a, b, 3);
>  }
>
>  // CHECK-LABEL: @test_vset_lane_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
>  // CHECK:   ret <2 x i32> [[VSET_LANE]]
>  int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
>    return vset_lane_s32(a, b, 1);
> @@ -14111,18 +14067,14 @@ poly8x8_t test_vset_lane_p8(poly8_t a, p
>  }
>
>  // CHECK-LABEL: @test_vset_lane_p16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
>  // CHECK:   ret <4 x i16> [[VSET_LANE]]
>  poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
>    return vset_lane_p16(a, b, 3);
>  }
>
>  // CHECK-LABEL: @test_vset_lane_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float
> %a, i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a,
> i32 1
>  // CHECK:   ret <2 x float> [[VSET_LANE]]
>  float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
>    return vset_lane_f32(a, b, 1);
> @@ -14139,9 +14091,7 @@ float32x2_t test_vset_lane_f32(float32_t
>  // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
>  // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x
> i16>*
>  // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
> -// CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
> -// CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16
> [[TMP2]], i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16
> [[TMP2]], i32 1
>  // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]],
> align 8
>  // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x
> half>*
>  // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
> @@ -14158,18 +14108,14 @@ uint8x16_t test_vsetq_lane_u8(uint8_t a,
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_u16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
>    return vsetq_lane_u16(a, b, 7);
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_u32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
>    return vsetq_lane_u32(a, b, 3);
> @@ -14183,18 +14129,14 @@ int8x16_t test_vsetq_lane_s8(int8_t a, i
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_s16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
>    return vsetq_lane_s16(a, b, 7);
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_s32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a,
> i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
>  // CHECK:   ret <4 x i32> [[VSET_LANE]]
>  int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
>    return vsetq_lane_s32(a, b, 3);
> @@ -14208,18 +14150,14 @@ poly8x16_t test_vsetq_lane_p8(poly8_t a,
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_p16(
> -// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a,
> i32 7
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
>  // CHECK:   ret <8 x i16> [[VSET_LANE]]
>  poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
>    return vsetq_lane_p16(a, b, 7);
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_f32(
> -// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float
> %a, i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a,
> i32 3
>  // CHECK:   ret <4 x float> [[VSET_LANE]]
>  float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
>    return vsetq_lane_f32(a, b, 3);
> @@ -14236,9 +14174,7 @@ float32x4_t test_vsetq_lane_f32(float32_
>  // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
>  // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x
> i16>*
>  // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
> -// CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
> -// CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16
> [[TMP2]], i32 3
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16
> [[TMP2]], i32 3
>  // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]],
> align 16
>  // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x
> half>*
>  // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
> @@ -14248,36 +14184,28 @@ float16x8_t test_vsetq_lane_f16(float16_
>  }
>
>  // CHECK-LABEL: @test_vset_lane_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a,
> i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
>    return vset_lane_s64(a, b, 0);
>  }
>
>  // CHECK-LABEL: @test_vset_lane_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a,
> i32 0
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
>  // CHECK:   ret <1 x i64> [[VSET_LANE]]
>  uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
>    return vset_lane_u64(a, b, 0);
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_s64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
>    return vsetq_lane_s64(a, b, 1);
>  }
>
>  // CHECK-LABEL: @test_vsetq_lane_u64(
> -// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
> -// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
> -// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a,
> i32 1
> +// CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
>  // CHECK:   ret <2 x i64> [[VSET_LANE]]
>  uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
>    return vsetq_lane_u64(a, b, 1);
>
> Modified: cfe/trunk/test/Headers/arm-neon-header.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Headers/arm-neon-header.c?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/Headers/arm-neon-header.c (original)
> +++ cfe/trunk/test/Headers/arm-neon-header.c Wed Oct  9 10:57:59 2019
> @@ -20,4 +20,7 @@
>  // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++
> --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s
>  // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++
> --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s
>
> +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding
> --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc
> -flax-vector-conversions=none %s
> +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding
> --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc
> -flax-vector-conversions=none %s
> +
>  #include <arm_neon.h>
>
> Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=374191&r1=374190&r2=374191&view=diff
>
> ==============================================================================
> --- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
> +++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Oct  9 10:57:59 2019
> @@ -636,7 +636,7 @@ std::string Type::builtin_str() const {
>      default: llvm_unreachable("Unhandled case!");
>      }
>
> -  if (isChar() && !Pointer)
> +  if (isChar() && !Pointer && Signed)
>      // Make chars explicitly signed.
>      S = "S" + S;
>    else if (isInteger() && !Pointer && !Signed)
> @@ -1442,7 +1442,7 @@ void Intrinsic::emitBodyAsBuiltinCall()
>      }
>
>      // Check if an explicit cast is needed.
> -    if (CastToType.isVector()) {
> +    if (CastToType.isVector() && LocalCK == ClassB) {
>        CastToType.makeInteger(8, true);
>        Arg = "(" + CastToType.str() + ")" + Arg;
>      }
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20191009/0d89238f/attachment-0001.html>


More information about the cfe-commits mailing list