[clang] [llvm] [AArch64][NEON] NEON intrinsic compilation error with -fno-lax-vector-conversion flag fix (PR #149329)

Wed Jul 30 01:57:12 PDT 2025

https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/149329

>From 2895e5e7b56c1c611b39a5c85de92d18f3aae71a Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Tue, 15 Jul 2025 15:56:49 +0000
Subject: [PATCH 1/7] [AArch64][NEON] Fix poly lane intrinsics under
 -fno-lax-vector-conversions. Issue originally raised in
 https://github.com/llvm/llvm-project/issues/71362#issuecomment-3028515618.
 Certain NEON intrinsics that operate on poly types (e.g. poly8x8_t) failed to
 compile with the -fno-lax-vector-conversions flag. This patch updates
 NeonEmitter.cpp to insert an explicit __builtin_bit_cast from poly types to
 the required signed integer vector types when generating lane-based
 intrinsics. A test neon-bitcast-poly is included.

---
 clang/utils/TableGen/NeonEmitter.cpp          | 10 +++-
 .../test/CodeGen/AArch64/neon-bitcast-poly.ll | 51 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 409f1c4f71834..574a29d0e4dd9 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,12 +1401,20 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      }
+      else if ((T.isPoly() ||
+          (T.isInteger() && !T.isSigned() &&
+           StringRef(Name).contains("_p8")) ||
+          StringRef(Name).contains("_p16") ||
+          StringRef(Name).contains("_p64"))) {
+            CastToType.makeSigned();
+            Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      }
       } else if (LocalCK == ClassI) {
         if (CastToType.isInteger()) {
           CastToType.makeSigned();
           Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
         }
-      }
     }
 
     S += Arg + ", ";
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
new file mode 100644
index 0000000000000..b577eb1e34b09
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) emit correct AArch64 instructions
+; after bitcasting to signed integer vectors. These intrinsics would previously fail under -fno-lax-vector-conversions.
+
+define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    mov v0.b[0], v1.b[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %vset_lane
+}
+
+define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> %vec) {
+; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov v0.h[0], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0
+  ret <4 x i16> %vset_lane
+}
+
+define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){
+; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %vget_lane = extractelement <1 x i64> %vec, i64 0
+  ret i64 %vget_lane
+}
+
+define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> %vec){
+; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.b[0], w0
+; CHECK-NEXT:    ret
+entry:
+  %vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0
+  ret <16 x i8> %vset_lane
+}

>From c300ab6ced97df16728fac0a07c94e38792a2047 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Wed, 16 Jul 2025 13:53:30 +0000
Subject: [PATCH 2/7] Added isVector() condition to avoid scalar constants.

---
 clang/utils/TableGen/NeonEmitter.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 574a29d0e4dd9..d3dd1c5589920 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,22 +1401,19 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      } else if ((T.isPoly() || (T.isVector() && T.isInteger() && !T.isSigned() &&
+                  (StringRef(Name).contains("_p8") ||
+                    StringRef(Name).contains("_p16") ||
+                    StringRef(Name).contains("_p64"))))) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+      } else if (LocalCK == ClassI && CastToType.isInteger()) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
-      else if ((T.isPoly() ||
-          (T.isInteger() && !T.isSigned() &&
-           StringRef(Name).contains("_p8")) ||
-          StringRef(Name).contains("_p16") ||
-          StringRef(Name).contains("_p64"))) {
-            CastToType.makeSigned();
-            Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      }
-      } else if (LocalCK == ClassI) {
-        if (CastToType.isInteger()) {
-          CastToType.makeSigned();
-          Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-        }
     }
 
+
     S += Arg + ", ";
   }
 

>From 102ca6f20dac9e2c5a458ee5e637e517f242c949 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Thu, 17 Jul 2025 14:42:43 +0000
Subject: [PATCH 3/7] Newline deletion

---
 clang/utils/TableGen/NeonEmitter.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d3dd1c5589920..1bd8c8b58c396 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1402,9 +1402,9 @@ void Intrinsic::emitBodyAsBuiltinCall() {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if ((T.isPoly() || (T.isVector() && T.isInteger() && !T.isSigned() &&
-                  (StringRef(Name).contains("_p8") ||
-                    StringRef(Name).contains("_p16") ||
-                    StringRef(Name).contains("_p64"))))) {
+                (StringRef(Name).contains("_p8") ||
+                  StringRef(Name).contains("_p16") ||
+                  StringRef(Name).contains("_p64"))))) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if (LocalCK == ClassI && CastToType.isInteger()) {
@@ -1412,8 +1412,6 @@ void Intrinsic::emitBodyAsBuiltinCall() {
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
     }
-
-
     S += Arg + ", ";
   }
 

>From 7106ac95552f7bb32321cbc7b6d5e9df3eec578b Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Tue, 22 Jul 2025 08:43:47 +0000
Subject: [PATCH 4/7] Code formatting change

---
 clang/utils/TableGen/NeonEmitter.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 1bd8c8b58c396..da3bbd4303074 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,10 +1401,11 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if ((T.isPoly() || (T.isVector() && T.isInteger() && !T.isSigned() &&
-                (StringRef(Name).contains("_p8") ||
-                  StringRef(Name).contains("_p16") ||
-                  StringRef(Name).contains("_p64"))))) {
+      } else if ((T.isPoly() ||
+                  (T.isVector() && T.isInteger() && !T.isSigned() &&
+                   (StringRef(Name).contains("_p8") ||
+                    StringRef(Name).contains("_p16") ||
+                    StringRef(Name).contains("_p64"))))) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if (LocalCK == ClassI && CastToType.isInteger()) {

>From 6b12c80ca200872f7e9e9f4afac5d42b31d9b349 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Fri, 25 Jul 2025 14:23:29 +0000
Subject: [PATCH 5/7] - Added neon-bitcast-poly.c test - Amended check

---
 .../test/CodeGen/AArch64/neon-bitcast-poly.c  | 247 ++++++++++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          |  10 +-
 .../test/CodeGen/AArch64/neon-bitcast-poly.ll |  51 ----
 3 files changed, 249 insertions(+), 59 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/neon-bitcast-poly.c
 delete mode 100644 llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll

diff --git a/clang/test/CodeGen/AArch64/neon-bitcast-poly.c b/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
new file mode 100644
index 0000000000000..6d619ce62d7ed
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
@@ -0,0 +1,247 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none \
+// RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -passes=instcombine | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: @test_vdupb_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A:%.*]], i64 1
+// CHECK-NEXT:    ret i8 [[VGET_LANE]]
+//
+poly8_t test_vdupb_lane_p8(poly8x8_t a){
+  return vdupb_lane_p8(a, 1);
+}
+
+// CHECK-LABEL: @test_vdupb_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[A:%.*]], i64 5
+// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
+//
+poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
+  return vdupb_laneq_p8(a, 5);
+}
+
+// CHECK-LABEL: @test_vset_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[V:%.*]], i8 [[A:%.*]], i64 3
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t v){
+  return vset_lane_p8(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vset_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[A:%.*]], i64 3
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t v){
+  return vset_lane_p16(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vset_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x i64> poison, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
+//
+poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v){
+  return vset_lane_p64(a, v, 0);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <16 x i8> [[V:%.*]], i8 [[A:%.*]], i64 3
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t v){
+  return vsetq_lane_p8(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[A:%.*]], i64 3
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t v){
+  return vsetq_lane_p16(a, v, 3);
+}
+
+// CHECK-LABEL: @test_vsetq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <2 x i64> [[V:%.*]], i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v){
+  return vsetq_lane_p64(a, v, 0);
+}
+
+// CHECK-LABEL: @test_vget_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i8 [[VGET_LANE]]
+//
+poly8_t test_vget_lane_p8(poly8x8_t v){
+  return vget_lane_p8(v, 2);
+}
+
+// CHECK-LABEL: @test_vget_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i16 [[VGET_LANE]]
+//
+poly16_t test_vget_lane_p16(poly16x4_t v){
+  return vget_lane_p16(v, 2);
+}
+
+// CHECK-LABEL: @test_vget_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[V:%.*]], i64 0
+// CHECK-NEXT:    ret i64 [[VGET_LANE]]
+//
+poly64_t test_vget_lane_p64(poly64x1_t v){
+  return vget_lane_p64(v, 0);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
+//
+poly8_t test_vgetq_lane_p8(poly8x16_t v){
+  return vgetq_lane_p8(v, 2);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[V:%.*]], i64 2
+// CHECK-NEXT:    ret i16 [[VGETQ_LANE]]
+//
+poly16_t test_vgetq_lane_p16(poly16x8_t v){
+  return vgetq_lane_p16(v, 2);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[V:%.*]], i64 0
+// CHECK-NEXT:    ret i64 [[VGETQ_LANE]]
+//
+poly64_t test_vgetq_lane_p64(poly64x2_t v){
+  return vgetq_lane_p64(v, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vcopy_lane_p8(poly8x8_t a, poly8x8_t b) {
+  return vcopy_lane_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x i16> [[A:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vcopy_lane_p16(poly16x4_t a, poly16x4_t b) {
+  return vcopy_lane_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <1 x i64> [[B:%.*]]
+//
+poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
+  return vcopy_lane_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vcopyq_lane_p8(poly8x16_t a, poly8x8_t b){
+  return vcopyq_lane_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vcopyq_lane_p16(poly16x8_t a, poly16x4_t b){
+  return vcopyq_lane_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_lane_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b){
+  return vcopyq_lane_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[B:%.*]], i64 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[A:%.*]], i8 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
+//
+poly8x8_t test_vcopy_laneq_p8(poly8x8_t a, poly8x16_t b){
+  return vcopy_laneq_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i64 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
+//
+poly16x4_t test_vcopy_laneq_p16(poly16x4_t a, poly16x8_t b){
+  return vcopy_laneq_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopy_laneq_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
+//
+poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t b){
+  return vcopy_laneq_p64(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
+//
+poly8x16_t test_vcopyq_laneq_p8(poly8x16_t a, poly8x16_t b){
+  return vcopyq_laneq_p8(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
+//
+poly16x8_t test_vcopyq_laneq_p16(poly16x8_t a, poly16x8_t b){
+  return vcopyq_laneq_p16(a, 0, b, 0);
+}
+
+// CHECK-LABEL: @test_vcopyq_laneq_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
+// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
+//
+poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b){
+  return vcopyq_laneq_p64(a, 0, b, 0);
+}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index da3bbd4303074..946a799a4f6a5 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,14 +1401,8 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if ((T.isPoly() ||
-                  (T.isVector() && T.isInteger() && !T.isSigned() &&
-                   (StringRef(Name).contains("_p8") ||
-                    StringRef(Name).contains("_p16") ||
-                    StringRef(Name).contains("_p64"))))) {
-        CastToType.makeSigned();
-        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if (LocalCK == ClassI && CastToType.isInteger()) {
+      } else if (LocalCK == ClassI &&
+           (CastToType.isInteger() || CastToType.isPoly())) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
deleted file mode 100644
index b577eb1e34b09..0000000000000
--- a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
-
-; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) emit correct AArch64 instructions
-; after bitcasting to signed integer vectors. These intrinsics would previously fail under -fno-lax-vector-conversions.
-
-define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    mov v0.b[0], v1.b[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %vset_lane
-}
-
-define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> %vec) {
-; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov v0.h[0], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0
-  ret <4 x i16> %vset_lane
-}
-
-define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){
-; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
-entry:
-  %vget_lane = extractelement <1 x i64> %vec, i64 0
-  ret i64 %vget_lane
-}
-
-define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> %vec){
-; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov v0.b[0], w0
-; CHECK-NEXT:    ret
-entry:
-  %vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0
-  ret <16 x i8> %vset_lane
-}

>From 76da0234067b8d5cd05b6d23dc94378c7d5a59b7 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Mon, 28 Jul 2025 16:05:03 +0000
Subject: [PATCH 6/7] Code format changes

---
 clang/utils/TableGen/NeonEmitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 946a799a4f6a5..2dc942ddf6d7f 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1402,7 +1402,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       } else if (LocalCK == ClassI &&
-           (CastToType.isInteger() || CastToType.isPoly())) {
+                 (CastToType.isInteger() || CastToType.isPoly())) {
         CastToType.makeSigned();
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }

>From c2dbd50600a1d07b7b0da916b580e77236f00253 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Wed, 30 Jul 2025 08:56:45 +0000
Subject: [PATCH 7/7] - Removed neon-bitcast-poly.c - Updated c tests
 neon-scalar-copy, neon-vget and poly64 to use -fno-lax-vector-conversions

---
 .../test/CodeGen/AArch64/neon-bitcast-poly.c  | 247 ------------------
 clang/test/CodeGen/AArch64/neon-scalar-copy.c |   2 +-
 clang/test/CodeGen/AArch64/neon-vget.c        |   2 +-
 clang/test/CodeGen/AArch64/poly64.c           |   2 +-
 4 files changed, 3 insertions(+), 250 deletions(-)
 delete mode 100644 clang/test/CodeGen/AArch64/neon-bitcast-poly.c

diff --git a/clang/test/CodeGen/AArch64/neon-bitcast-poly.c b/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
deleted file mode 100644
index 6d619ce62d7ed..0000000000000
--- a/clang/test/CodeGen/AArch64/neon-bitcast-poly.c
+++ /dev/null
@@ -1,247 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-
-
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none \
-// RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -passes=instcombine | FileCheck %s
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_neon.h>
-
-// CHECK-LABEL: @test_vdupb_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A:%.*]], i64 1
-// CHECK-NEXT:    ret i8 [[VGET_LANE]]
-//
-poly8_t test_vdupb_lane_p8(poly8x8_t a){
-  return vdupb_lane_p8(a, 1);
-}
-
-// CHECK-LABEL: @test_vdupb_laneq_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[A:%.*]], i64 5
-// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
-//
-poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
-  return vdupb_laneq_p8(a, 5);
-}
-
-// CHECK-LABEL: @test_vset_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[V:%.*]], i8 [[A:%.*]], i64 3
-// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
-//
-poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t v){
-  return vset_lane_p8(a, v, 3);
-}
-
-// CHECK-LABEL: @test_vset_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[A:%.*]], i64 3
-// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
-//
-poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t v){
-  return vset_lane_p16(a, v, 3);
-}
-
-// CHECK-LABEL: @test_vset_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x i64> poison, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
-//
-poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v){
-  return vset_lane_p64(a, v, 0);
-}
-
-// CHECK-LABEL: @test_vsetq_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <16 x i8> [[V:%.*]], i8 [[A:%.*]], i64 3
-// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
-//
-poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t v){
-  return vsetq_lane_p8(a, v, 3);
-}
-
-// CHECK-LABEL: @test_vsetq_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[A:%.*]], i64 3
-// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
-//
-poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t v){
-  return vsetq_lane_p16(a, v, 3);
-}
-
-// CHECK-LABEL: @test_vsetq_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <2 x i64> [[V:%.*]], i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
-//
-poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v){
-  return vsetq_lane_p64(a, v, 0);
-}
-
-// CHECK-LABEL: @test_vget_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x i8> [[V:%.*]], i64 2
-// CHECK-NEXT:    ret i8 [[VGET_LANE]]
-//
-poly8_t test_vget_lane_p8(poly8x8_t v){
-  return vget_lane_p8(v, 2);
-}
-
-// CHECK-LABEL: @test_vget_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[V:%.*]], i64 2
-// CHECK-NEXT:    ret i16 [[VGET_LANE]]
-//
-poly16_t test_vget_lane_p16(poly16x4_t v){
-  return vget_lane_p16(v, 2);
-}
-
-// CHECK-LABEL: @test_vget_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x i64> [[V:%.*]], i64 0
-// CHECK-NEXT:    ret i64 [[VGET_LANE]]
-//
-poly64_t test_vget_lane_p64(poly64x1_t v){
-  return vget_lane_p64(v, 0);
-}
-
-// CHECK-LABEL: @test_vgetq_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[V:%.*]], i64 2
-// CHECK-NEXT:    ret i8 [[VGETQ_LANE]]
-//
-poly8_t test_vgetq_lane_p8(poly8x16_t v){
-  return vgetq_lane_p8(v, 2);
-}
-
-// CHECK-LABEL: @test_vgetq_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[V:%.*]], i64 2
-// CHECK-NEXT:    ret i16 [[VGETQ_LANE]]
-//
-poly16_t test_vgetq_lane_p16(poly16x8_t v){
-  return vgetq_lane_p16(v, 2);
-}
-
-// CHECK-LABEL: @test_vgetq_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[V:%.*]], i64 0
-// CHECK-NEXT:    ret i64 [[VGETQ_LANE]]
-//
-poly64_t test_vgetq_lane_p64(poly64x2_t v){
-  return vgetq_lane_p64(v, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
-//
-poly8x8_t test_vcopy_lane_p8(poly8x8_t a, poly8x8_t b) {
-  return vcopy_lane_p8(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x i16> [[A:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
-//
-poly16x4_t test_vcopy_lane_p16(poly16x4_t a, poly16x4_t b) {
-  return vcopy_lane_p16(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <1 x i64> [[B:%.*]]
-//
-poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
-  return vcopy_lane_p64(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_lane_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
-//
-poly8x16_t test_vcopyq_lane_p8(poly8x16_t a, poly8x8_t b){
-  return vcopyq_lane_p8(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_lane_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[B:%.*]], <4 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
-//
-poly16x8_t test_vcopyq_lane_p16(poly16x8_t a, poly16x4_t b){
-  return vcopyq_lane_p16(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_lane_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
-// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
-//
-poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b){
-  return vcopyq_lane_p64(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_laneq_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <16 x i8> [[B:%.*]], i64 0
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x i8> [[A:%.*]], i8 [[VGETQ_LANE]], i64 0
-// CHECK-NEXT:    ret <8 x i8> [[VSET_LANE]]
-//
-poly8x8_t test_vcopy_laneq_p8(poly8x8_t a, poly8x16_t b){
-  return vcopy_laneq_p8(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_laneq_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i64 0
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 [[VGETQ_LANE]], i64 0
-// CHECK-NEXT:    ret <4 x i16> [[VSET_LANE]]
-//
-poly16x4_t test_vcopy_laneq_p16(poly16x4_t a, poly16x8_t b){
-  return vcopy_laneq_p16(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopy_laneq_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> poison, <1 x i32> zeroinitializer
-// CHECK-NEXT:    ret <1 x i64> [[VSET_LANE]]
-//
-poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t b){
-  return vcopy_laneq_p64(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_laneq_p8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-// CHECK-NEXT:    ret <16 x i8> [[VSET_LANE]]
-//
-poly8x16_t test_vcopyq_laneq_p8(poly8x16_t a, poly8x16_t b){
-  return vcopyq_laneq_p8(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_laneq_p16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <8 x i16> [[VSET_LANE]]
-//
-poly16x8_t test_vcopyq_laneq_p16(poly16x8_t a, poly16x8_t b){
-  return vcopyq_laneq_p16(a, 0, b, 0);
-}
-
-// CHECK-LABEL: @test_vcopyq_laneq_p64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> [[A:%.*]], <2 x i32> <i32 0, i32 3>
-// CHECK-NEXT:    ret <2 x i64> [[VSET_LANE]]
-//
-poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b){
-  return vcopyq_laneq_p64(a, 0, b, 0);
-}
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-copy.c b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
index 4ad1ce53a3b39..bd80068e5bf85 100644
--- a/clang/test/CodeGen/AArch64/neon-scalar-copy.c
+++ b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
diff --git a/clang/test/CodeGen/AArch64/neon-vget.c b/clang/test/CodeGen/AArch64/neon-vget.c
index b17a7ab342817..ebc8c2f9228ed 100644
--- a/clang/test/CodeGen/AArch64/neon-vget.c
+++ b/clang/test/CodeGen/AArch64/neon-vget.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon -flax-vector-conversions=none \
 // RUN:   -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=mem2reg | FileCheck %s
 
diff --git a/clang/test/CodeGen/AArch64/poly64.c b/clang/test/CodeGen/AArch64/poly64.c
index 578dd2054dc66..00838b648dfe6 100644
--- a/clang/test/CodeGen/AArch64/poly64.c
+++ b/clang/test/CodeGen/AArch64/poly64.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN:  -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
 // RUN:  | FileCheck %s